Created
May 30, 2017 14:24
-
-
Save Kimundi/06c79383fa0887d302f5ae214e662907 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/CMakeLists.txt b/CMakeLists.txt | |
index 85d1e817..e649c1b5 100644 | |
--- a/CMakeLists.txt | |
+++ b/CMakeLists.txt | |
@@ -36,9 +36,10 @@ endif() | |
include(ExternalProject) | |
# More warnings and debug info | |
+# TODO: Add these, they break building glog -Wextra -Wpedantic | |
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++14 -Wall") | |
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=gnu++14 -DNDEBUG -march=native") | |
-set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=gnu++14 -O0 -ggdb -DDEBUG") | |
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -march=native") | |
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb -DDEBUG") | |
find_package(Boost) | |
diff --git a/docs/Documentation.md b/docs/Documentation.md | |
index 373f7021..393beec8 100644 | |
--- a/docs/Documentation.md | |
+++ b/docs/Documentation.md | |
@@ -1004,7 +1004,7 @@ an ASCII encoding for single bits, as identified by the | |
~~~ {.cpp caption="coder_impl.cpp"} | |
template<typename value_t> | |
-inline void encode(value_t v, const BitRange& r) { | |
+inline void encode(value_t v, const BitRange&) { | |
// Encode single bits as ASCII | |
m_out->write_int(v ? '1' : '0'); | |
} | |
@@ -1014,7 +1014,7 @@ The same idea works with decoding: | |
~~~ {.cpp caption="coder_impl.cpp"} | |
template<typename value_t> | |
-inline value_t decode(const BitRange& r) { | |
+inline value_t decode(const BitRange&) { | |
// Decode an ASCII character and compare against '0' | |
uint8_t b = m_in->read_int<uint8_t>(); | |
return (b != '0'); | |
@@ -1475,7 +1475,7 @@ in the following snippet: | |
// Allocate memory, but only track mem2 | |
StatPhase::pause_tracking(); | |
char* mem1 = new char[1024]; | |
-StatPhase::resume_tracking(); | |
+StatPhase::resume_tracking(); | |
char* mem2 = new char[2048]; | |
diff --git a/etc/genregistry.py b/etc/genregistry.py | |
index 8c4fa9e3..30426109 100755 | |
--- a/etc/genregistry.py | |
+++ b/etc/genregistry.py | |
@@ -95,6 +95,7 @@ lcpc_buffer = [ | |
("lcpcomp::ScanDec", "compressors/lcpcomp/decompress/ScanDec.hpp", []), | |
("lcpcomp::DecodeForwardQueueListBuffer", "compressors/lcpcomp/decompress/DecodeQueueListBuffer.hpp", []), | |
("lcpcomp::CompactDec", "compressors/lcpcomp/decompress/CompactDec.hpp", []), | |
+ ("lcpcomp::MyMapBuffer", "compressors/lcpcomp/decompress/MyMapBuffer.hpp", []), | |
("lcpcomp::MultimapBuffer", "compressors/lcpcomp/decompress/MultiMapBuffer.hpp", []), | |
] | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/AUTHORS b/exttools/EM-SuccinctIrreducible-0.1.0/AUTHORS | |
deleted file mode 100644 | |
index ea0c897a..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/AUTHORS | |
+++ /dev/null | |
@@ -1,2 +0,0 @@ | |
-Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
-Dominik Kempa <dominik.kempa (at) gmail.com> | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/LICENCE b/exttools/EM-SuccinctIrreducible-0.1.0/LICENCE | |
deleted file mode 100644 | |
index f229d208..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/LICENCE | |
+++ /dev/null | |
@@ -1,24 +0,0 @@ | |
-Copyright (C) 2016 | |
-Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
-Dominik Kempa <dominik.kempa (at) gmail.com> | |
- | |
-Permission is hereby granted, free of charge, to any person | |
-obtaining a copy of this software and associated documentation | |
-files (the "Software"), to deal in the Software without | |
-restriction, including without limitation the rights to use, | |
-copy, modify, merge, publish, distribute, sublicense, and/or sell | |
-copies of the Software, and to permit persons to whom the | |
-Software is furnished to do so, subject to the following | |
-conditions: | |
- | |
-The above copyright notice and this permission notice shall be | |
-included in all copies or substantial portions of the Software. | |
- | |
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
-WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
-OTHER DEALINGS IN THE SOFTWARE. | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/README b/exttools/EM-SuccinctIrreducible-0.1.0/README | |
deleted file mode 100644 | |
index dfe8eb77..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/README | |
+++ /dev/null | |
@@ -1,50 +0,0 @@ | |
-EM-SuccinctIrreducible - external memory LCP array construction algorithm | |
-========================================================================= | |
- | |
- | |
-Description | |
------------ | |
- | |
-This package contains implementation of the external memory LCP array | |
-construction algorithm called EM-SuccinctIrreducible. The algorithm is | |
-described in the paper | |
- | |
- Juha Karkkainen and Dominik Kempa, | |
- Faster External Memory LCP Array Construction. | |
- In Proc. 24th European Symposium on Algorithms (ESA), 2016. | |
- | |
-The latest version of EM-SuccinctIrreducible is available at: | |
- http://www.cs.helsinki.fi/group/pads/ | |
- | |
- | |
- | |
-Compilation and usage | |
---------------------- | |
- | |
-2. Compile EM-SuccinctIrreducible using the provided Makefile | |
- | |
- $ cd src | |
- $ make | |
- | |
-This will produce six executables that allow computing the (P)LCP array | |
-of a given file sequentially and in parallel. For usage, run the | |
-programs without any arguments. EM-SuccinctIrreducible relies on the | |
-prior computation of suffix array and BWT for the input text. The | |
-suffix array for big files can be computed e.g. using the pSAscan | |
-algorithm, see https://www.cs.helsinki.fi/group/pads/pSAscan.html for | |
-more details. | |
- | |
- | |
- | |
-Terms of use | |
------------- | |
- | |
-EM-SuccinctIrreducible is released under the MIT/X11 license. See the | |
-file LICENCE for more details. | |
- | |
-If you use this code, please cite the paper mentioned above and publish | |
-the URL from which you downloaded the code. | |
- | |
- | |
- | |
-Helsinki, Aug 2016. | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/VERSION b/exttools/EM-SuccinctIrreducible-0.1.0/VERSION | |
deleted file mode 100644 | |
index 6e8bf73a..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/VERSION | |
+++ /dev/null | |
@@ -1 +0,0 @@ | |
-0.1.0 | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/Makefile b/exttools/EM-SuccinctIrreducible-0.1.0/src/Makefile | |
deleted file mode 100644 | |
index 4d668fde..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/Makefile | |
+++ /dev/null | |
@@ -1,37 +0,0 @@ | |
-SHELL = /bin/sh | |
-CC = $(CXX) | |
-CFLAGS = -Wall -Wextra -pedantic -Wshadow -funroll-loops -std=c++0x -O3 -DNDEBUG -pthread -march=native | |
-AUX_PAR_FLAGS = -fopenmp | |
-#CFLAGS = -Wall -Wextra -pedantic -Wshadow -std=c++0x -g2 -O3 -pthread | |
- | |
-all: construct_lcp_sequential \ | |
- construct_lcp_parallel \ | |
- construct_plcp_sequential \ | |
- construct_plcp_parallel \ | |
- construct_lcp_from_plcp_sequential \ | |
- construct_lcp_from_plcp_parallel | |
- | |
-construct_lcp_sequential: | |
- $(CC) $(CFLAGS) -o construct_lcp_sequential em_succinct_irreducible_src/utils.cpp main_construct_lcp_array.cpp | |
-construct_lcp_parallel: | |
- $(CC) $(CFLAGS) -o construct_lcp_parallel em_succinct_irreducible_src/utils.cpp main_construct_lcp_array.cpp $(AUX_PAR_FLAGS) | |
-construct_plcp_sequential: | |
- $(CC) $(CFLAGS) -o construct_plcp_sequential em_succinct_irreducible_src/utils.cpp main_construct_plcp_bitvector.cpp | |
-construct_plcp_parallel: | |
- $(CC) $(CFLAGS) -o construct_plcp_parallel em_succinct_irreducible_src/utils.cpp main_construct_plcp_bitvector.cpp $(AUX_PAR_FLAGS) | |
-construct_lcp_from_plcp_sequential: | |
- $(CC) $(CFLAGS) -o construct_lcp_from_plcp_sequential em_succinct_irreducible_src/utils.cpp main_construct_lcp_from_plcp.cpp | |
-construct_lcp_from_plcp_parallel: | |
- $(CC) $(CFLAGS) -o construct_lcp_from_plcp_parallel em_succinct_irreducible_src/utils.cpp main_construct_lcp_from_plcp.cpp $(AUX_PAR_FLAGS) | |
- | |
-clean: | |
- /bin/rm -f *.o | |
- | |
-nuclear: | |
- /bin/rm -f construct_lcp_sequential \ | |
- construct_lcp_parallel \ | |
- construct_plcp_sequential \ | |
- construct_plcp_parallel \ | |
- construct_lcp_from_plcp_sequential \ | |
- construct_lcp_from_plcp_parallel \ | |
- *.o | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_B.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_B.hpp | |
deleted file mode 100644 | |
index 1b17be1e..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_B.hpp | |
+++ /dev/null | |
@@ -1,500 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/compute_B.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_B_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_B_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <string> | |
-#include <vector> | |
-#include <algorithm> | |
-#include <omp.h> | |
- | |
-#include "io/async_stream_reader.hpp" | |
- | |
-#include "set_bits.hpp" | |
-#include "utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-template<typename ext_text_offset_type> | |
-void compute_B(std::uint64_t text_length, std::uint64_t *B, | |
- std::string irreducible_bits_filename, std::string C_filename, | |
- std::uint64_t phi_undefined_position, std::uint64_t &total_io_volume) { | |
- fprintf(stderr, " Compute bitvector encoding of PLCP array: "); | |
- long double start = utils::wclock(); | |
- std::uint64_t io_volume = 0; | |
- | |
- // Fill in the bits in B corresponding | |
- // to irreducible lcp values. | |
- { | |
- // Initialize reader of irreducible positions. | |
- typedef async_stream_reader<ext_text_offset_type> irreducible_bits_reader_type; | |
- irreducible_bits_reader_type *irreducible_bits_reader = | |
- new irreducible_bits_reader_type(irreducible_bits_filename); | |
- | |
- // Allocate the buffer. | |
- static const std::uint64_t buffer_size = (1UL << 20); | |
- ext_text_offset_type *buf = new ext_text_offset_type[buffer_size]; | |
-#ifdef _OPENMP | |
- ext_text_offset_type *tempbuf = new ext_text_offset_type[buffer_size]; | |
-#endif | |
- | |
- // Stream and set bits inside B. | |
- std::uint64_t count = utils::file_size(irreducible_bits_filename) / sizeof(ext_text_offset_type); | |
- { | |
- std::uint64_t items_processed = 0; | |
- while (items_processed < count) { | |
- std::uint64_t filled = std::min(count - items_processed, buffer_size); | |
- irreducible_bits_reader->read(buf, filled); | |
-#ifdef _OPENMP | |
- set_bits(B, 2UL * text_length, buf, filled, tempbuf); | |
-#else | |
- for (std::uint64_t j = 0; j < filled; ++j) { | |
- std::uint64_t idx = buf[j]; | |
- B[idx >> 6] |= (1UL << (idx & 63)); | |
- } | |
-#endif | |
- items_processed += filled; | |
- } | |
- } | |
- | |
- // Special case. | |
- { | |
- std::uint64_t idx = 2 * phi_undefined_position; | |
- B[idx >> 6] |= (1UL << (idx & 63)); | |
- } | |
- | |
- // Update I/O volume. | |
- io_volume += irreducible_bits_reader->bytes_read(); | |
- | |
- // Clean up. | |
-#ifdef _OPENMP | |
- delete[] tempbuf; | |
-#endif | |
- delete[] buf; | |
- delete irreducible_bits_reader; | |
- utils::file_delete(irreducible_bits_filename); | |
- } | |
- | |
- // Fill in reducible LCP values. | |
- { | |
- // Initialize reader of C. | |
- typedef async_stream_reader<std::uint64_t> C_reader_type; | |
- C_reader_type *C_reader = new C_reader_type(C_filename); | |
- | |
- // Initialize the bit-buffer for reader of C. | |
- std::uint64_t bitbuf = C_reader->read(); | |
- std::uint64_t bitbuf_pos = 0; | |
- bool C_bit = (bitbuf & (1UL << (bitbuf_pos++))); | |
- | |
- // Add reducible bits. | |
- for (std::uint64_t j = 0; j < 2UL * text_length; ++j) { | |
- // Set the bit in B. | |
- if (C_bit == 0) | |
- B[j >> 6] |= (1UL << (j & 63)); | |
- | |
- // Read the next bit from C. | |
- if (B[j >> 6] & (1UL << (j & 63))) { | |
- if (bitbuf_pos < 64 || C_reader->empty() == false) { | |
- if (bitbuf_pos == 64) { | |
- bitbuf = C_reader->read(); | |
- bitbuf_pos = 0; | |
- } | |
- C_bit = (bitbuf & (1UL << (bitbuf_pos++))); | |
- } | |
- } | |
- } | |
- | |
- // Update I/O volume. | |
- io_volume += C_reader->bytes_read(); | |
- | |
- // Clean up. | |
- delete C_reader; | |
- utils::file_delete(C_filename); | |
- } | |
- | |
- // Update I/O volume. | |
- total_io_volume += io_volume; | |
- | |
- // Print summary. | |
- long double elapsed = utils::wclock() - start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", | |
- elapsed, ((1.L * io_volume) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length); | |
-} | |
- | |
-template<typename ext_text_offset_type> | |
-void compute_B(std::uint64_t text_length, std::uint64_t max_block_size_B, | |
- std::uint64_t phi_undefined_position, std::string B_filename, std::string C_filename, | |
- std::string *irreducible_bits_filenames, std::uint64_t &total_io_volume) { | |
- std::uint64_t n_blocks_B = (2UL * text_length + max_block_size_B - 1) / max_block_size_B; | |
- | |
- fprintf(stderr, " Compute bitvector encoding of PLCP array: "); | |
- long double start = utils::wclock(); | |
- | |
- // Initialize reader of C. | |
- typedef async_stream_reader<std::uint64_t> C_reader_type; | |
- C_reader_type *C_reader = new C_reader_type(C_filename); | |
- | |
- // Initialize the bit-buffer for reader of C. | |
- std::uint64_t bitbuf = C_reader->read(); | |
- std::uint64_t bitbuf_pos = 0; | |
- bool C_bit = (bitbuf & (1UL << (bitbuf_pos++))); | |
- | |
- std::uint64_t io_vol = 0; | |
- std::uint64_t max_block_size_B_in_words = max_block_size_B / 64; | |
- std::uint64_t *B = new std::uint64_t[max_block_size_B_in_words]; | |
- std::FILE *f = utils::file_open(B_filename, "w"); | |
- | |
- // Allocate the buffer. | |
- static const std::uint64_t buffer_size = (1UL << 20); | |
- ext_text_offset_type *buf = new ext_text_offset_type[buffer_size]; | |
-#ifdef _OPENMP | |
- ext_text_offset_type *tempbuf = new ext_text_offset_type[buffer_size]; | |
-#endif | |
- | |
- for (std::uint64_t block_id = 0; block_id < n_blocks_B; ++block_id) { | |
- std::uint64_t block_beg = block_id * max_block_size_B; | |
- std::uint64_t block_end = std::min(block_beg + max_block_size_B, 2 * text_length); | |
- std::uint64_t block_size = block_end - block_beg; | |
- std::uint64_t block_size_in_words = (block_size + 63) / 64; | |
- | |
- // Zero-initialize the block of B. | |
- std::fill(B, B + block_size_in_words, 0UL); | |
- | |
- // Initialize the reader of irreducible positions. | |
- typedef async_stream_reader<ext_text_offset_type> irreducible_bits_reader_type; | |
- irreducible_bits_reader_type *irreducible_bits_reader = | |
- new irreducible_bits_reader_type(irreducible_bits_filenames[block_id]); | |
- | |
- // Read and set the bits in the block of B. | |
- std::uint64_t count = utils::file_size(irreducible_bits_filenames[block_id]) / sizeof(ext_text_offset_type); | |
- { | |
- std::uint64_t items_processed = 0; | |
- while (items_processed < count) { | |
- std::uint64_t filled = std::min(count - items_processed, buffer_size); | |
- irreducible_bits_reader->read(buf, filled); | |
-#ifdef _OPENMP | |
- #pragma omp parallel for | |
- for (std::uint64_t j = 0; j < filled; ++j) | |
- buf[j] = (std::uint64_t)buf[j] - block_beg; | |
- | |
- set_bits(B, block_size, buf, filled, tempbuf); | |
-#else | |
- for (std::uint64_t j = 0; j < filled; ++j) { | |
- std::uint64_t idx = buf[j]; | |
- std::uint64_t offset = idx - block_beg; | |
- B[offset >> 6] |= (1UL << (offset & 63)); | |
- } | |
-#endif | |
- | |
- items_processed += filled; | |
- } | |
- } | |
- | |
- // Special case for 1-bit corresponding to PLCP[SA[0]]. | |
- if (block_beg <= 2 * phi_undefined_position && 2 * phi_undefined_position < block_end) { | |
- std::uint64_t offset = 2 * phi_undefined_position - block_beg; | |
- B[offset >> 6] |= (1UL << (offset & 63)); | |
- } | |
- | |
- // Add reducible bits. | |
- for (std::uint64_t j = 0; j < block_size; ++j) { | |
- // Set the bit in B. | |
- if (C_bit == 0) | |
- B[j >> 6] |= (1UL << (j & 63)); | |
- | |
- // Read the next bit from C. | |
- if (B[j >> 6] & (1UL << (j & 63))) { | |
- if (bitbuf_pos < 64 || C_reader->empty() == false) { | |
- if (bitbuf_pos == 64) { | |
- bitbuf = C_reader->read(); | |
- bitbuf_pos = 0; | |
- } | |
- C_bit = (bitbuf & (1UL << (bitbuf_pos++))); | |
- } | |
- } | |
- } | |
- | |
- // Write current block of B to file. | |
- utils::write_to_file(B, block_size_in_words, f); | |
- | |
- // Update I/O volume. | |
- io_vol += irreducible_bits_reader->bytes_read() + block_size_in_words * sizeof(std::uint64_t); | |
- | |
- // Clean up. | |
- delete irreducible_bits_reader; | |
- utils::file_delete(irreducible_bits_filenames[block_id]); | |
- } | |
- | |
- // Update I/O volume. | |
- io_vol += C_reader->bytes_read(); | |
- total_io_volume += io_vol; | |
- | |
- // Clean up. | |
-#ifdef _OPENMP | |
- delete[] tempbuf; | |
-#endif | |
- delete[] buf; | |
- delete[] B; | |
- delete C_reader; | |
- std::fclose(f); | |
- utils::file_delete(C_filename); | |
- | |
- // Print summary. | |
- long double elapsed = utils::wclock() - start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", elapsed, | |
- ((1.L * io_vol) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length); | |
-} | |
- | |
-template<typename text_offset_type> | |
-std::uint64_t *compute_B(std::uint64_t text_length, std::string text_filename, | |
- std::string sa_filename, std::uint64_t &n_irreducible_lcps, | |
- std::uint64_t &sum_irreducible_lcps, std::uint64_t &total_io_volume) { | |
- // Initialize basic parameters. | |
- std::uint64_t local_n_irreducible_lcps = 0; | |
- std::uint64_t local_sum_irreducible_lcps = 0; | |
- | |
- // Allocate bitvectors. | |
- std::uint64_t C_size_in_words = (text_length + 63) / 64; | |
- std::uint64_t B_size_in_words = (2UL * text_length + 63) / 64; | |
- std::uint64_t *C = new std::uint64_t[C_size_in_words]; | |
- std::uint64_t *B = new std::uint64_t[B_size_in_words]; | |
- std::fill(C, C + C_size_in_words, 0UL); | |
- std::fill(B, B + B_size_in_words, 0UL); | |
- | |
- // Read text. | |
- std::uint8_t *text = new std::uint8_t[text_length]; | |
- { | |
- // Start the timer. | |
- fprintf(stderr, " Read text: "); | |
- long double read_start = utils::wclock(); | |
- std::uint64_t io_volume = 0; | |
- | |
- // Read data. | |
- utils::read_from_file(text, text_length, text_filename); | |
- | |
- // Update I/O volume. | |
- io_volume += text_length; | |
- total_io_volume += io_volume; | |
- | |
- // Print summary. | |
- long double read_time = utils::wclock() - read_start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", read_time, | |
- ((1.L * io_volume) / (1L << 20)) / read_time, (1.L * total_io_volume) / text_length); | |
- } | |
- | |
- // Compute irreducible lcp values. | |
- { | |
- // Start the timer. | |
- fprintf(stderr, " Compute irreducible LCP values: "); | |
- long double compute_irr_lcp_start = utils::wclock(); | |
- | |
- // Initialize basic statistics. | |
- std::uint64_t io_volume = 0; | |
- | |
- // Initialize SA reader. | |
- typedef async_stream_reader<text_offset_type> sa_reader_type; | |
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename); | |
- | |
- // Allocate buffers. | |
- static const std::uint64_t buf_size = (1UL << 20); | |
- text_offset_type *sa_buf = new text_offset_type[buf_size]; | |
- std::uint8_t *bwt_buf = new std::uint8_t[buf_size]; | |
-#ifdef _OPENMP | |
- std::uint64_t *pair_buf = new std::uint64_t[buf_size * 2]; | |
- std::uint64_t *ans_buf_B = new std::uint64_t[buf_size]; | |
- std::uint64_t *ans_buf_C = new std::uint64_t[buf_size]; | |
-#endif | |
- | |
- // Processing of SA follows. | |
- std::uint64_t sa_items_read = 0; | |
- std::uint64_t prev_sa = text_length; | |
- std::uint8_t prev_bwt = 0; | |
- while (sa_items_read < text_length) { | |
- std::uint64_t buf_filled = std::min(buf_size, text_length - sa_items_read); | |
- sa_reader->read(sa_buf, buf_filled); | |
- | |
- // Compute BWT buffer. | |
-#ifdef _OPENMP | |
- #pragma omp parallel for | |
- for (std::uint64_t j = 0; j < buf_filled; ++j) { | |
- std::uint64_t addr = (std::uint64_t)sa_buf[j]; | |
- if (addr > 0) bwt_buf[j] = text[addr - 1]; | |
- } | |
-#else | |
- for (std::uint64_t j = 0; j < buf_filled; ++j) { | |
- std::uint64_t addr = (std::uint64_t)sa_buf[j]; | |
- if (addr > 0) bwt_buf[j] = text[addr - 1]; | |
- } | |
-#endif | |
- | |
- // Process buffer. | |
-#ifdef _OPENMP | |
- { | |
- // Bring the irreducible pairs together. | |
- std::uint64_t buf_irr_filled = 0; | |
- for (std::uint64_t j = 0; j < buf_filled; ++j) { | |
- std::uint64_t cur_sa = (std::uint64_t)sa_buf[j]; | |
- std::uint8_t cur_bwt = bwt_buf[j]; | |
- if ((sa_items_read == 0 && j == 0) || (cur_sa == 0) || (prev_sa == 0) || (cur_bwt != prev_bwt)) { | |
- pair_buf[2 * buf_irr_filled] = cur_sa; | |
- pair_buf[2 * buf_irr_filled + 1] = prev_sa; | |
- ++buf_irr_filled; | |
- } | |
- prev_sa = cur_sa; | |
- prev_bwt = cur_bwt; | |
- } | |
- | |
- // Update statistics. | |
- local_n_irreducible_lcps += buf_irr_filled; | |
- | |
- if (buf_irr_filled > 0) { | |
- // Compute lcp values in parallel. | |
- #pragma omp parallel | |
- { | |
- std::uint64_t thread_sum_irreducible_lcps = 0; | |
- | |
- #pragma omp for nowait | |
- for (std::uint64_t j = 0; j < buf_irr_filled; ++j) { | |
- std::uint64_t i = pair_buf[2 * j]; | |
- std::uint64_t phi_i = pair_buf[2 * j + 1]; | |
- std::uint64_t lcp = 0; | |
- while (i + lcp < text_length && phi_i + lcp < text_length && | |
- text[i + lcp] == text[phi_i + lcp]) ++lcp; | |
- thread_sum_irreducible_lcps += lcp; | |
- ans_buf_C[j] = i; | |
- ans_buf_B[j] = 2 * i + lcp; | |
- } | |
- | |
- #pragma omp critical | |
- { | |
- local_sum_irreducible_lcps += thread_sum_irreducible_lcps; | |
- } | |
- } | |
- | |
- // Set the bits in B and C in parallel. | |
- set_bits(B, 2UL * text_length, ans_buf_B, buf_irr_filled, pair_buf); | |
- set_bits(C, 1UL * text_length, ans_buf_C, buf_irr_filled, pair_buf); | |
- } | |
- } | |
-#else | |
- for (std::uint64_t j = 0; j < buf_filled; ++j) { | |
- std::uint64_t cur_sa = (std::uint64_t)sa_buf[j]; | |
- std::uint8_t cur_bwt = bwt_buf[j]; | |
- if ((sa_items_read == 0 && j == 0) || (cur_sa == 0) || (prev_sa == 0) || (cur_bwt != prev_bwt)) { | |
- // Compute irreducible lcp(cur_sa, prev_sa) naively. | |
- std::uint64_t lcp = 0; | |
- while (cur_sa + lcp < text_length && prev_sa + lcp < text_length && | |
- text[cur_sa + lcp] == text[prev_sa + lcp]) ++lcp; | |
- | |
- // Set the corresponding bits in the B and C. | |
- std::uint64_t bv_idx = 2UL * cur_sa + lcp; | |
- B[bv_idx >> 6] |= (1UL << (bv_idx & 63)); | |
- C[cur_sa >> 6] |= (1UL << (cur_sa & 63)); | |
- | |
- // Update statistics. | |
- ++local_n_irreducible_lcps; | |
- local_sum_irreducible_lcps += lcp; | |
- } | |
- | |
- prev_sa = cur_sa; | |
- prev_bwt = cur_bwt; | |
- } | |
-#endif | |
- | |
- sa_items_read += buf_filled; | |
- } | |
- | |
- // Update I/O volume. | |
- io_volume += sa_reader->bytes_read(); | |
- total_io_volume += io_volume; | |
- | |
- // Clean up. | |
- delete[] sa_buf; | |
- delete[] bwt_buf; | |
- delete sa_reader; | |
-#ifdef _OPENMP | |
- delete[] pair_buf; | |
- delete[] ans_buf_B; | |
- delete[] ans_buf_C; | |
-#endif | |
- | |
- // Print summary. | |
- long double compute_irr_lcp_time = utils::wclock() - compute_irr_lcp_start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB, total I/O vol = %.2Lfn\n", compute_irr_lcp_time, | |
- ((1.L * io_volume) / (1L << 20)) / compute_irr_lcp_time, (1.L * total_io_volume) / text_length); | |
- } | |
- | |
- // Clean up. | |
- delete[] text; | |
- | |
- // Fill in reducible LCP values. | |
- { | |
- fprintf(stderr, " Fill missing reducible LCP values: "); | |
- long double fill_in_reduc_start = utils::wclock(); | |
- | |
- std::uint64_t B_ptr = 0; | |
- for (std::uint64_t j = 0; j < text_length; ++j) { | |
- if ((C[j >> 6] & (1UL << (j & 63))) == 0) { | |
- // Mark the 1-bit corresponding to reducible LCP value. | |
- B[B_ptr >> 6] |= (1UL << (B_ptr & 63)); | |
- } else { | |
- // Find the next 1-bit in B. | |
- while ((B[B_ptr >> 6] & (1UL << (B_ptr & 63))) == 0) | |
- ++B_ptr; | |
- } | |
- ++B_ptr; | |
- } | |
- | |
- // Print summary. | |
- long double fill_in_reduc_time = utils::wclock() - fill_in_reduc_start; | |
- fprintf(stderr, "time = %.2Lfs\n", fill_in_reduc_time); | |
- } | |
- | |
- // Clean up. | |
- delete[] C; | |
- | |
- // Update reference variables. | |
- n_irreducible_lcps = local_n_irreducible_lcps; | |
- sum_irreducible_lcps = local_sum_irreducible_lcps; | |
- | |
- // Return the pointer to B. | |
- return B; | |
-} | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_B_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_array.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_array.hpp | |
deleted file mode 100644 | |
index a1c7e0ec..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_array.hpp | |
+++ /dev/null | |
@@ -1,143 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/compute_lcp_array.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_ARRAY_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_ARRAY_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstdint> | |
-#include <ctime> | |
-#include <string> | |
-#include <limits> | |
-#include <algorithm> | |
-#include <unistd.h> | |
- | |
-#include "compute_plcp_bitvector.hpp" | |
-#include "compute_lcp_from_plcp.hpp" | |
-#include "utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-template<typename text_offset_type, typename ext_text_offset_type> | |
-void compute_lcp_array(std::uint64_t text_length, std::uint64_t ram_use, | |
- std::string text_filename, std::string sa_filename, std::string bwt_filename, | |
- std::string output_filename, std::uint64_t &max_lcp, std::uint64_t &lcp_sum, | |
- std::uint64_t &n_irreducible_lcps, std::uint64_t &sum_irreducible_lcps, | |
- std::uint64_t &total_io_volume) { | |
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use; | |
- if (text_to_ram_ratio > 4.0L) { | |
- // Not enough RAM to hold B in RAM. | |
- std::string B_filename = output_filename + ".plcp." + utils::random_string_hash(); | |
- compute_plcp_bitvector_small_ram<text_offset_type, ext_text_offset_type>(text_length, ram_use, text_filename, | |
- sa_filename, bwt_filename, B_filename, n_irreducible_lcps, sum_irreducible_lcps, total_io_volume); | |
- | |
- compute_lcp_from_plcp<text_offset_type>(text_length, ram_use, sa_filename, | |
- output_filename, B_filename, total_io_volume, max_lcp, lcp_sum); | |
- } else { | |
- // Enough RAM to hold B in RAM. | |
- std::uint64_t *B = compute_plcp_bitvector_large_ram<text_offset_type, ext_text_offset_type>(text_length, | |
- ram_use, text_filename, sa_filename, bwt_filename, output_filename, n_irreducible_lcps, | |
- sum_irreducible_lcps, total_io_volume); | |
- | |
- compute_lcp_from_plcp<text_offset_type>(text_length, ram_use, B, sa_filename, | |
- output_filename, total_io_volume, max_lcp, lcp_sum); | |
- } | |
-} | |
- | |
-template<typename text_offset_type, typename ext_text_offset_type> | |
-void compute_lcp_array(std::string text_filename, std::string sa_filename, | |
- std::string bwt_filename, std::string output_filename, std::uint64_t ram_use) { | |
- srand(time(0) + getpid()); | |
- utils::drop_disk_pages(text_filename); | |
- utils::drop_disk_pages(sa_filename); | |
- utils::drop_disk_pages(bwt_filename); | |
- long double global_start = utils::wclock(); | |
- | |
- // Initialize basic parameters. | |
- std::uint64_t text_length = utils::file_size(text_filename); | |
- std::uint64_t lcp_sum = 0; | |
- std::uint64_t max_lcp = 0; | |
- std::uint64_t n_irreducible_lcps = 0; | |
- std::uint64_t sum_irreducible_lcps = 0; | |
- std::uint64_t total_io_volume = 0; | |
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use; | |
- | |
- if (text_length == 0) { | |
- fprintf(stderr, "Error: the input file is empty!\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Turn paths absolute. | |
- text_filename = utils::absolute_path(text_filename); | |
- sa_filename = utils::absolute_path(sa_filename); | |
- bwt_filename = utils::absolute_path(bwt_filename); | |
- output_filename = utils::absolute_path(output_filename); | |
- | |
- // Print summary of basic parameters. | |
- fprintf(stderr, "Text filename = %s\n", text_filename.c_str()); | |
- fprintf(stderr, "SA filename = %s\n", sa_filename.c_str()); | |
- fprintf(stderr, "BWT filename = %s\n", bwt_filename.c_str()); | |
- fprintf(stderr, "Output filename = %s\n", output_filename.c_str()); | |
- fprintf(stderr, "Text length = %lu (%.2LfMiB)\n", text_length, 1.L * text_length / (1 << 20)); | |
- fprintf(stderr, "Text size / ram_use = %.2Lf\n", text_to_ram_ratio); | |
- fprintf(stderr, "RAM use = %lu (%.2LfMiB)\n", ram_use, ram_use / (1024.L * 1024)); | |
- fprintf(stderr, "sizeof(text_offset_type) = %lu\n", sizeof(text_offset_type)); | |
- fprintf(stderr, "sizeof(ext_text_offset_type) = %lu\n", sizeof(ext_text_offset_type)); | |
-#ifdef _OPENMP | |
- fprintf(stderr, "Max number of threads = %d\n", omp_get_max_threads()); | |
-#endif | |
- fprintf(stderr, "\n"); | |
- | |
- compute_lcp_array<text_offset_type, ext_text_offset_type>(text_length, ram_use, text_filename, | |
- sa_filename, bwt_filename, output_filename, max_lcp, lcp_sum, | |
- n_irreducible_lcps, sum_irreducible_lcps, total_io_volume); | |
- | |
- // Print summary. | |
- long double total_time = utils::wclock() - global_start; | |
- long double avg_lcp = (long double)lcp_sum / text_length; | |
- fprintf(stderr, "\n\nComputation finished. Summary:\n"); | |
- fprintf(stderr, " elapsed time = %.2Lfs (%.3Lfs/MiB of text)\n", total_time, total_time / (1.L * text_length / (1L << 20))); | |
- fprintf(stderr, " speed = %.2LfMiB of text/s\n", (1.L * text_length / (1L << 20)) / total_time); | |
- fprintf(stderr, " I/O volume = %lu (%.2Lfbytes/input symbol)\n", total_io_volume, (1.L * total_io_volume) / text_length); | |
- fprintf(stderr, " number of irreducible LCPs = %lu\n", n_irreducible_lcps); | |
- fprintf(stderr, " sum of irreducible LCPs = %lu\n", sum_irreducible_lcps); | |
- fprintf(stderr, " sum of all LCPs = %lu\n", lcp_sum); | |
- fprintf(stderr, " average LCP = %.2Lf\n", avg_lcp); | |
- fprintf(stderr, " maximal LCP = %lu\n", max_lcp); | |
-} | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_ARRAY_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_from_plcp.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_from_plcp.hpp | |
deleted file mode 100644 | |
index 23e99ae4..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_from_plcp.hpp | |
+++ /dev/null | |
@@ -1,388 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/compute_lcp_from_plcp.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_FROM_PLCP_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_FROM_PLCP_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <string> | |
-#include <algorithm> | |
-#include <omp.h> | |
- | |
-#include "io/async_stream_reader.hpp" | |
-#include "io/async_stream_writer.hpp" | |
-#include "io/async_multi_stream_writer.hpp" | |
-#include "io/async_multipart_file_writer.hpp" | |
-#include "io/async_multipart_multifile_reader.hpp" | |
-#include "utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-template<typename text_offset_type> | |
-void compute_lcp_from_plcp(std::uint64_t text_length, std::uint64_t ram_use, | |
- std::string sa_filename, std::string output_filename, std::string B_filename, | |
- std::uint64_t &global_io_volume, std::uint64_t &max_lcp, std::uint64_t &lcp_sum, | |
- bool keep_plcp = false) { | |
- fprintf(stderr, "Convert PLCP to LCP:\n"); | |
- long double convert_plcp_to_lcp_start = utils::wclock(); | |
- | |
- // Initialize basic parameters. | |
- std::uint64_t max_block_size = ram_use / sizeof(text_offset_type); | |
- std::uint64_t n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- std::uint64_t local_lcp_sum = 0; | |
- std::uint64_t local_max_lcp = 0; | |
- std::uint64_t total_io_volume = 0; | |
- | |
- // Print info about blocks. | |
- fprintf(stderr, " Max block size = %lu (%.2LfMiB)\n", max_block_size, (1.L * max_block_size) / (1L << 20)); | |
- fprintf(stderr, " Number of blocks = %lu\n", n_blocks); | |
- | |
- // Set the filenames of files storing SA and LCP subsequences. | |
- std::string *sa_subsequences_filenames = new std::string[n_blocks]; | |
- std::string *lcp_subsequences_filenames = new std::string[n_blocks]; | |
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) { | |
- sa_subsequences_filenames[block_id] = output_filename + ".sa_subseq." + utils::intToStr(block_id) + "." + utils::random_string_hash(); | |
- lcp_subsequences_filenames[block_id] = output_filename + ".lcp_sebseq." + utils::intToStr(block_id) + "." + utils::random_string_hash(); | |
- } | |
- | |
- // Compute SA subsequences. | |
- { | |
- fprintf(stderr, " Compute SA subsequences: "); | |
- long double compute_sa_subseq_start = utils::wclock(); | |
- std::uint64_t io_volume = 0; | |
- | |
- // Initialize streaming of suffix array. | |
- typedef async_stream_reader<text_offset_type> sa_reader_type; | |
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename); | |
- | |
- // Initialize multifile writer of SA subsequences. | |
- static const std::uint64_t n_free_buffers = 4; | |
- std::uint64_t total_buffers_ram = ram_use; | |
- std::uint64_t buffer_size = std::min((16UL << 20), total_buffers_ram / (n_blocks + n_free_buffers)); | |
- typedef async_multi_stream_writer<text_offset_type> sa_multiwriter_type; | |
- sa_multiwriter_type *sa_multiwriter = new sa_multiwriter_type(buffer_size, n_free_buffers); | |
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) | |
- sa_multiwriter->add_file(sa_subsequences_filenames[block_id]); | |
- | |
- // Read SA / write SA subsequences. | |
- for (std::uint64_t j = 0; j < text_length; ++j) { | |
- std::uint64_t sa_j = sa_reader->read(); | |
- std::uint64_t block_id = sa_j / max_block_size; | |
- sa_multiwriter->write_to_ith_file(block_id, sa_j); | |
- } | |
- | |
- // Update I/O volume. | |
- io_volume += sa_reader->bytes_read() + sa_multiwriter->bytes_written(); | |
- total_io_volume += io_volume; | |
- | |
- // Clean up. | |
- delete sa_reader; | |
- delete sa_multiwriter; | |
- | |
- // Print summary. | |
- long double compute_sa_subseq_time = utils::wclock() - compute_sa_subseq_start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", compute_sa_subseq_time, | |
- ((1.L * io_volume) / (1L << 20)) / compute_sa_subseq_time, (1.L * total_io_volume) / text_length); | |
- } | |
- | |
- // Compute LCP subsequences. | |
- { | |
- fprintf(stderr, " Compute LCP subsequences: "); | |
- long double compute_lcp_subseq_start = utils::wclock(); | |
- std::uint64_t io_volume = 0; | |
- | |
- // Allocate the array holding the block of PLCP. | |
- text_offset_type *plcp_block = new text_offset_type[max_block_size]; | |
- | |
- // Initialize reading of PLCP bitvector. | |
- typedef async_stream_reader<std::uint64_t> plcp_bitvector_reader_type; | |
- plcp_bitvector_reader_type *plcp_bitvector_reader = new plcp_bitvector_reader_type(B_filename); | |
- std::uint64_t bitbuf = plcp_bitvector_reader->read(); | |
- std::uint64_t bitpos = 0; | |
- std::uint64_t cur_plcp = 1; | |
- | |
- // Allocate buffer. | |
- static const std::uint64_t buffer_size = (1UL << 20); | |
- text_offset_type *buf = new text_offset_type[buffer_size]; | |
- text_offset_type *outbuf = new text_offset_type[buffer_size]; | |
- | |
- // Process blocks left to right. | |
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) { | |
- std::uint64_t block_beg = block_id * max_block_size; | |
- std::uint64_t block_end = std::min(block_beg + max_block_size, text_length); | |
- std::uint64_t block_size = block_end - block_beg; | |
- | |
- // Read a block of PLCP into RAM. | |
- for (std::uint64_t j = 0; j < block_size; ++j) { | |
- // Increment cur_plcp for every 0 in the bitvector. | |
- while ((bitbuf & (1UL << bitpos)) == 0) { | |
- ++cur_plcp; | |
- ++bitpos; | |
- if (bitpos == 64) { | |
- bitbuf = plcp_bitvector_reader->read(); | |
- bitpos = 0; | |
- } | |
- } | |
- | |
- // We decrement last because cur_plcp is unsigned. | |
- --cur_plcp; | |
- plcp_block[j] = cur_plcp; | |
- | |
- // Skip the 1-bit in the bitvector. | |
- ++bitpos; | |
- if (bitpos == 64) { | |
- if (plcp_bitvector_reader->empty() == false) | |
- bitbuf = plcp_bitvector_reader->read(); | |
- bitpos = 0; | |
- } | |
- } | |
- | |
- // Compute LCP subsequence and write to file. | |
- { | |
- // Initialize SA subsequence reader. | |
- typedef async_stream_reader<text_offset_type> sa_subseq_reader_type; | |
- sa_subseq_reader_type *sa_subseq_reader = | |
- new sa_subseq_reader_type(sa_subsequences_filenames[block_id]); | |
- | |
- // Initialize LCP subsequence writer. | |
- std::uint64_t single_file_max_bytes = text_length / (n_blocks * 2UL); // 10UL | |
- typedef async_multipart_file_writer<text_offset_type> lcp_subseq_writer_type; | |
- lcp_subseq_writer_type *lcp_subseq_writer = | |
- new lcp_subseq_writer_type(lcp_subsequences_filenames[block_id], single_file_max_bytes); | |
- | |
- // Compute LCP subsequence. | |
- std::uint64_t subseq_size = utils::file_size(sa_subsequences_filenames[block_id]) / sizeof(text_offset_type); | |
- std::uint64_t items_processed = 0; | |
- while (items_processed < subseq_size) { | |
- std::uint64_t filled = std::min(buffer_size, subseq_size - items_processed); | |
- sa_subseq_reader->read(buf, filled); | |
-#ifdef _OPENMP | |
- #pragma omp parallel for | |
- for (std::uint64_t j = 0; j < filled; ++j) { | |
- std::uint64_t sa_val = buf[j]; | |
- std::uint64_t lcp_val = plcp_block[sa_val - block_beg]; | |
- outbuf[j] = lcp_val; | |
- } | |
-#else | |
- for (std::uint64_t j = 0; j < filled; ++j) { | |
- std::uint64_t sa_val = buf[j]; | |
- std::uint64_t lcp_val = plcp_block[sa_val - block_beg]; | |
- outbuf[j] = lcp_val; | |
- } | |
-#endif | |
- lcp_subseq_writer->write(outbuf, filled); | |
- items_processed += filled; | |
- } | |
- | |
- // Update I/O volume. | |
- io_volume += sa_subseq_reader->bytes_read() + lcp_subseq_writer->bytes_written(); | |
- | |
- // Clean up. | |
- delete sa_subseq_reader; | |
- delete lcp_subseq_writer; | |
- } | |
- | |
- utils::file_delete(sa_subsequences_filenames[block_id]); | |
- } | |
- | |
- // Update I/O volume. | |
- io_volume += plcp_bitvector_reader->bytes_read(); | |
- total_io_volume += io_volume; | |
- | |
- // Clean up. | |
- delete[] buf; | |
- delete[] outbuf; | |
- delete[] plcp_block; | |
- delete plcp_bitvector_reader; | |
- if (keep_plcp == false) | |
- utils::file_delete(B_filename); | |
- | |
- // Print summary. | |
- long double compute_lcp_subseq_time = utils::wclock() - compute_lcp_subseq_start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", compute_lcp_subseq_time, | |
- ((1.L * io_volume) / (1L << 20)) / compute_lcp_subseq_time, (1.L * total_io_volume) / text_length); | |
- } | |
- | |
- // Merge LCP subsequences. | |
- { | |
- fprintf(stderr, " Merge LCP subsequences: "); | |
- long double merge_lcp_subseq_start = utils::wclock(); | |
- std::uint64_t io_volume = 0; | |
- | |
- // Initialize the reader of LCP subsequences. | |
- std::uint64_t total_buffers_ram = ram_use; | |
- std::uint64_t buffer_size = total_buffers_ram / (2UL * n_blocks); | |
- typedef async_multipart_multifile_reader<text_offset_type> lcp_subseq_multireader_type; | |
- lcp_subseq_multireader_type *lcp_subseq_multireader = | |
- new lcp_subseq_multireader_type(n_blocks, buffer_size); | |
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) | |
- lcp_subseq_multireader->add_file(lcp_subsequences_filenames[block_id]); | |
- | |
- // Initialize the writer of the final LCP array. | |
- typedef async_stream_writer<text_offset_type> lcp_writer_type; | |
- lcp_writer_type *lcp_writer = new lcp_writer_type(output_filename); | |
- | |
- // Initialize the reader of SA. | |
- typedef async_stream_reader<text_offset_type> sa_reader_type; | |
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename); | |
- | |
- // Compute final LCP. | |
- for (std::uint64_t j = 0; j < text_length; ++j) { | |
- std::uint64_t sa_j = sa_reader->read(); | |
- std::uint64_t block_id = sa_j / max_block_size; | |
- std::uint64_t lcp_j = lcp_subseq_multireader->read_from_ith_file(block_id); | |
- local_max_lcp = std::max(local_max_lcp, lcp_j); | |
- local_lcp_sum += lcp_j; | |
- lcp_writer->write(lcp_j); | |
- } | |
- | |
- // Update I/O volume. | |
- io_volume += sa_reader->bytes_read() + lcp_subseq_multireader->bytes_read() + lcp_writer->bytes_written(); | |
- total_io_volume += io_volume; | |
- | |
- // Clean up. | |
- delete sa_reader; | |
- delete lcp_writer; | |
- delete lcp_subseq_multireader; | |
- | |
- // Print summary. | |
- long double merge_lcp_subseq_time = utils::wclock() - merge_lcp_subseq_start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", merge_lcp_subseq_time, | |
- ((1.L * io_volume) / (1L << 20)) / merge_lcp_subseq_time, (1.L * total_io_volume) / text_length); | |
- } | |
- | |
- // Clean up. | |
- delete[] sa_subsequences_filenames; | |
- delete[] lcp_subsequences_filenames; | |
- | |
- // Print summary. | |
- long double convert_plcp_to_lcp_time = utils::wclock() - convert_plcp_to_lcp_start; | |
- fprintf(stderr, "Summary: time = %.2Lfs, total I/O vol = %.2Lfn\n", | |
- convert_plcp_to_lcp_time, (1.L * total_io_volume) / text_length); | |
- | |
- // Update reference variables. | |
- global_io_volume += total_io_volume; | |
- max_lcp = local_max_lcp; | |
- lcp_sum = local_lcp_sum; | |
-} | |
- | |
-template<typename text_offset_type> | |
-void compute_lcp_from_plcp(std::uint64_t text_length, std::uint64_t ram_use, std::uint64_t *B, | |
- std::string sa_filename, std::string output_filename, std::uint64_t &total_io_volume, | |
- std::uint64_t &max_lcp, std::uint64_t &lcp_sum) { | |
- // Write B to disk. | |
- std::string B_filename = output_filename + ".plcp." + utils::random_string_hash(); | |
- { | |
- // Start the timer. | |
- fprintf(stderr, "Write PLCP bitvector to disk: "); | |
- long double write_plcp_start = utils::wclock(); | |
- std::uint64_t io_volume = 0; | |
- | |
- // Write the data. | |
- std::uint64_t length_of_B_in_words = (2UL * text_length + 63) / 64; | |
- utils::write_to_file(B, length_of_B_in_words, B_filename); | |
- | |
- // Update I/O volume. | |
- io_volume += length_of_B_in_words * sizeof(std::uint64_t); | |
- total_io_volume += io_volume; | |
- long double write_plcp_time = utils::wclock() - write_plcp_start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, I/O vol = %.2Lfn\n\n", write_plcp_time, | |
- ((1.L * io_volume) / (1L << 20)) / write_plcp_time, (1.L * io_volume) / text_length); | |
- } | |
- delete[] B; | |
- | |
- // Convert PLCP to LCP using EM method. | |
- compute_lcp_from_plcp<text_offset_type>(text_length, ram_use, sa_filename, | |
- output_filename, B_filename, total_io_volume, max_lcp, lcp_sum); | |
-} | |
- | |
-template<typename text_offset_type> | |
-void compute_lcp_from_plcp(std::string input_filename, std::string sa_filename, | |
- std::string output_filename, std::uint64_t ram_use) { | |
- srand(time(0) + getpid()); | |
- utils::drop_disk_pages(input_filename); | |
- utils::drop_disk_pages(sa_filename); | |
- long double start = utils::wclock(); | |
- std::uint64_t io_volume = 0; | |
- | |
- // Compute basic parameters. | |
- std::uint64_t text_length = utils::file_size(sa_filename) / sizeof(text_offset_type); | |
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use; | |
- | |
- if (text_length == 0) { | |
- fprintf(stderr, "Error: the input file is empty!\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Turn paths absolute. | |
- input_filename = utils::absolute_path(input_filename); | |
- sa_filename = utils::absolute_path(sa_filename); | |
- output_filename = utils::absolute_path(output_filename); | |
- | |
- // Print summary of basic parameters. | |
- fprintf(stderr, "PLCP filename = %s\n", input_filename.c_str()); | |
- fprintf(stderr, "SA filename = %s\n", sa_filename.c_str()); | |
- fprintf(stderr, "Output filename = %s\n", output_filename.c_str()); | |
- fprintf(stderr, "Text length = %lu (%.2LfMiB)\n", text_length, 1.L * text_length / (1 << 20)); | |
- fprintf(stderr, "Text size / ram_use = %.2Lf\n", text_to_ram_ratio); | |
- fprintf(stderr, "RAM use = %lu (%.2LfMiB)\n", ram_use, ram_use / (1024.L * 1024)); | |
- fprintf(stderr, "sizeof(text_offset_type) = %lu\n", sizeof(text_offset_type)); | |
-#ifdef _OPENMP | |
- fprintf(stderr, "Max number of threads = %d\n", omp_get_max_threads()); | |
-#endif | |
- fprintf(stderr, "\n"); | |
- | |
- std::uint64_t lcp_sum = 0; | |
- std::uint64_t max_lcp = 0; | |
- | |
- // Convert the PCLP array (bitvector representation) to LCP array. | |
- compute_lcp_from_plcp<text_offset_type>(text_length, ram_use, sa_filename, | |
- output_filename, input_filename, io_volume, max_lcp, lcp_sum, true); | |
- | |
- // Print summary. | |
- long double total_time = utils::wclock() - start; | |
- long double avg_lcp = (long double)lcp_sum / text_length; | |
- fprintf(stderr, "\n\nComputation finished. Summary:\n"); | |
- fprintf(stderr, " elapsed time = %.2Lfs (%.3Lfs/MiB of text)\n", total_time, total_time / (1.L * text_length / (1L << 20))); | |
- fprintf(stderr, " speed = %.2LfMiB of text/s\n", (1.L * text_length / (1L << 20)) / total_time); | |
- fprintf(stderr, " I/O volume = %lu (%.2Lfbytes/input symbol)\n", io_volume, (1.L * io_volume) / text_length); | |
- fprintf(stderr, " sum of all LCPs = %lu\n", lcp_sum); | |
- fprintf(stderr, " average LCP = %.2Lf\n", avg_lcp); | |
- fprintf(stderr, " maximal LCP = %lu\n", max_lcp); | |
-} | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_FROM_PLCP_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_plcp_bitvector.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_plcp_bitvector.hpp | |
deleted file mode 100644 | |
index e9724e39..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_plcp_bitvector.hpp | |
+++ /dev/null | |
@@ -1,318 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/compute_plcp_bitvector.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_PLCP_BITVECTOR_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_PLCP_BITVECTOR_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <ctime> | |
-#include <string> | |
-#include <algorithm> | |
-#include <omp.h> | |
-#include <unistd.h> | |
- | |
-#include "utils.hpp" | |
-#include "distribute_pairs_and_compute_C.hpp" | |
-#include "process_halfsegment_pairs.hpp" | |
-#include "compute_B.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-// A version that returns the B bitvector as a file on disk. | |
-template<typename text_offset_type, typename ext_text_offset_type> | |
-void compute_plcp_bitvector_small_ram(std::uint64_t text_length, std::uint64_t ram_use, | |
- std::string text_filename, std::string sa_filename, std::string bwt_filename, | |
- std::string B_filename, std::uint64_t &n_irreducible_lcps, | |
- std::uint64_t &sum_irreducible_lcps, std::uint64_t &total_io_volume) { | |
- fprintf(stderr, "Compute PLCP bitvector (dest = EM):\n"); | |
- long double compute_plcp_bitvector_start = utils::wclock(); | |
- | |
- // Initialize basic parameters. | |
- static const std::uint64_t max_overflow_size = (1UL << 20); | |
- std::uint64_t max_halfsegment_size = std::max(1UL, ram_use / 2); | |
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size; | |
- std::uint64_t n_different_halfsegment_pairs = (n_halfsegments * (n_halfsegments + 1)) / 2; | |
- std::uint64_t io_volume = 0; | |
- std::uint64_t max_block_size_B = std::max(64UL, (((ram_use * 8UL) >> 6) << 6)); | |
- std::uint64_t n_blocks_B = (2UL * text_length + max_block_size_B - 1) / max_block_size_B; | |
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use; | |
- | |
- // Print info about halfsegments. | |
- fprintf(stderr, " Max halfsegment size = %lu (%.2LfMiB)\n", max_halfsegment_size, (1.L * max_halfsegment_size / (1UL << 20))); | |
- fprintf(stderr, " Number of halfsegments = %lu\n", n_halfsegments); | |
- fprintf(stderr, " Number of halfsegment pairs = %lu\n", n_different_halfsegment_pairs); | |
- | |
- // Initialize file names with halfsegment pairs. | |
- std::string **pairs_filenames = new std::string*[n_halfsegments]; | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) { | |
- pairs_filenames[i] = new std::string[n_halfsegments]; | |
- for (std::uint64_t j = i; j < n_halfsegments; ++j) { | |
- std::string filename = B_filename + ".pairs." + utils::intToStr(i) + "_" + utils::intToStr(j); | |
- pairs_filenames[i][j] = filename; | |
- } | |
- } | |
- | |
- // Distribute pairs (i, Phi[i]) such that PLCP[i] is irreducible | |
- // into files corresponding to different halfsegment pairs and | |
- // compute the C bitvector. | |
- std::string C_filename = B_filename + ".irreducible_positions_bv"; | |
- std::uint64_t phi_undefined_position = 0; | |
- if (text_to_ram_ratio > 8.0L) { | |
- // Distribute pairs. | |
- phi_undefined_position = distribute_pairs<text_offset_type>(text_length, max_halfsegment_size, | |
- ram_use, sa_filename, bwt_filename, pairs_filenames, n_irreducible_lcps, io_volume); | |
- | |
- // Compute C. | |
- compute_C<text_offset_type>(text_length, max_halfsegment_size, ram_use, phi_undefined_position, | |
- pairs_filenames, sa_filename, bwt_filename, C_filename, io_volume); | |
- } else { | |
- // Distribute pairs and compute C. | |
- phi_undefined_position = distribute_pairs_and_compute_C<text_offset_type>(text_length, | |
- max_halfsegment_size, ram_use, sa_filename, bwt_filename, C_filename, | |
- pairs_filenames, n_irreducible_lcps, io_volume); | |
- } | |
- | |
- std::string *irreducible_bits_filenames = new std::string[n_blocks_B]; | |
- for (std::uint64_t block_id = 0; block_id < n_blocks_B; ++block_id) { | |
- std::string filename = B_filename + ".irreducible_bits_bv." + utils::intToStr(block_id); | |
- irreducible_bits_filenames[block_id] = filename; | |
- } | |
- | |
- // Process all pairs of halfsegments. | |
- sum_irreducible_lcps = process_halfsegment_pairs<text_offset_type, ext_text_offset_type>(text_filename, | |
- text_length, max_block_size_B, max_halfsegment_size, max_overflow_size, | |
- pairs_filenames, irreducible_bits_filenames, io_volume); | |
- | |
- // Clean up. | |
- for (std::uint64_t halfseg_id = 0; halfseg_id < n_halfsegments; ++halfseg_id) | |
- delete[] pairs_filenames[halfseg_id]; | |
- delete[] pairs_filenames; | |
- | |
- // Compute B. | |
- compute_B<ext_text_offset_type>(text_length, max_block_size_B, phi_undefined_position, | |
- B_filename, C_filename, irreducible_bits_filenames, io_volume); | |
- | |
- // Update I/O volume. | |
- total_io_volume += io_volume; | |
- | |
- // Clean up. | |
- delete[] irreducible_bits_filenames; | |
- | |
- // Print summary. | |
- long double compute_plcp_bitvector_time = utils::wclock() - compute_plcp_bitvector_start; | |
- fprintf(stderr, "Summary: time = %.2Lfs, total I/O vol = %.2Lfn\n\n", | |
- compute_plcp_bitvector_time, (1.L * io_volume) / text_length); | |
-} | |
- | |
-// A version, that returns a pointer to B bitvector. Requires at least 2n bits of RAM. | |
-template<typename text_offset_type, typename ext_text_offset_type> | |
-std::uint64_t* compute_plcp_bitvector_large_ram(std::uint64_t text_length, std::uint64_t ram_use, | |
- std::string text_filename, std::string sa_filename, std::string bwt_filename, | |
- std::string output_filename, std::uint64_t &n_irreducible_lcps, | |
- std::uint64_t &sum_irreducible_lcps, std::uint64_t &total_io_volume) { | |
- fprintf(stderr, "Compute PLCP bitvector (dest = RAM):\n"); | |
- long double compute_plcp_bitvector_start = utils::wclock(); | |
- | |
- // Initialize basic parameters. | |
- long double ram_to_text_ratio = (long double)ram_use / (long double)text_length; | |
- std::uint64_t *B = NULL; | |
- std::uint64_t io_volume = 0; | |
- | |
- if (ram_to_text_ratio < 1.375L) { | |
- // Initialize basic parameters. | |
- static const std::uint64_t max_overflow_size = (1UL << 20); | |
- std::uint64_t max_halfsegment_size = std::max(1UL, ram_use / 2); | |
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size; | |
- std::uint64_t n_different_halfsegment_pairs = (n_halfsegments * (n_halfsegments + 1)) / 2; | |
- | |
- // Print info about halfsegments. | |
- fprintf(stderr, " Max halfsegment size = %lu (%.2LfMiB)\n", max_halfsegment_size, (1.L * max_halfsegment_size / (1UL << 20))); | |
- fprintf(stderr, " Number of halfsegments = %lu\n", n_halfsegments); | |
- fprintf(stderr, " Number of halfsegment pairs = %lu\n", n_different_halfsegment_pairs); | |
- | |
- // Initialize file names with halfsegment pairs. | |
- std::string **pairs_filenames = new std::string*[n_halfsegments]; | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) { | |
- pairs_filenames[i] = new std::string[n_halfsegments]; | |
- for (std::uint64_t j = i; j < n_halfsegments; ++j) { | |
- std::string filename = output_filename + ".pairs." + utils::intToStr(i) + "_" + utils::intToStr(j); | |
- pairs_filenames[i][j] = filename; | |
- } | |
- } | |
- | |
- // Distribute pairs (i, Phi[i]) such that PLCP[i] is irreducible | |
- // into files corresponding to different halfsegment pairs and | |
- // compute the C bitvector. | |
- std::string C_filename = output_filename + ".irreducible_positions_bv"; | |
- std::uint64_t phi_undefined_position = distribute_pairs_and_compute_C<text_offset_type>(text_length, | |
- max_halfsegment_size, ram_use, sa_filename, bwt_filename, C_filename, pairs_filenames, | |
- n_irreducible_lcps, io_volume); | |
- | |
- // Process all pairs of halfsegments. | |
- std::string irreducible_bits_filename = output_filename + ".irreducible_bits"; | |
- sum_irreducible_lcps = process_halfsegment_pairs<text_offset_type, ext_text_offset_type>(text_filename, | |
- text_length, max_halfsegment_size, max_overflow_size, pairs_filenames, | |
- irreducible_bits_filename, io_volume); | |
- | |
- // Clean up. | |
- for (std::uint64_t halfseg_id = 0; halfseg_id < n_halfsegments; ++halfseg_id) | |
- delete[] pairs_filenames[halfseg_id]; | |
- delete[] pairs_filenames; | |
- | |
- // Allocate B. | |
- std::uint64_t B_size_in_words = (2UL * text_length + 63) / 64; | |
- B = new std::uint64_t[B_size_in_words]; | |
- std::fill(B, B + B_size_in_words, 0UL); | |
- | |
- // Compute B. | |
- compute_B<ext_text_offset_type>(text_length, B, irreducible_bits_filename, | |
- C_filename, phi_undefined_position, io_volume); | |
- } else { | |
- // Compute B. | |
- B = compute_B<text_offset_type>(text_length, text_filename, sa_filename, | |
- n_irreducible_lcps, sum_irreducible_lcps, io_volume); | |
- } | |
- | |
- // Update I/O volume. | |
- total_io_volume += io_volume; | |
- | |
- // Print summary. | |
- long double compute_plcp_bitvector_time = utils::wclock() - compute_plcp_bitvector_start; | |
- fprintf(stderr, "Summary: time = %.2Lfs, total I/O vol = %.2Lfn\n\n", | |
- compute_plcp_bitvector_time, (1.L * io_volume) / text_length); | |
- | |
- // Return pointer to B. | |
- return B; | |
-} | |
- | |
-template<typename text_offset_type, typename ext_text_offset_type> | |
-void compute_plcp_bitvector(std::uint64_t text_length, std::uint64_t ram_use, | |
- std::string text_filename, std::string sa_filename, std::string bwt_filename, | |
- std::string output_filename, std::uint64_t &n_irreducible_lcps, | |
- std::uint64_t &sum_irreducible_lcps, std::uint64_t &total_io_volume) { | |
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use; | |
- if (text_to_ram_ratio > 4.0L) { | |
- // Not enough RAM to hold B in RAM. | |
- compute_plcp_bitvector_small_ram<text_offset_type, ext_text_offset_type>(text_length, ram_use, text_filename, | |
- sa_filename, bwt_filename, output_filename, n_irreducible_lcps, sum_irreducible_lcps, total_io_volume); | |
- } else { | |
- // Enough RAM to hold B in RAM. | |
- std::uint64_t *B = compute_plcp_bitvector_large_ram<text_offset_type, ext_text_offset_type>(text_length, | |
- ram_use, text_filename, sa_filename, bwt_filename, output_filename, n_irreducible_lcps, | |
- sum_irreducible_lcps, total_io_volume); | |
- | |
- // Write B to disk. | |
- { | |
- // Start the timer. | |
- fprintf(stderr, "Write PLCP bitvector to disk: "); | |
- long double write_plcp_start = utils::wclock(); | |
- std::uint64_t io_volume = 0; | |
- | |
- // Write the data. | |
- std::uint64_t length_of_B_in_words = (2UL * text_length + 63) / 64; | |
- utils::write_to_file(B, length_of_B_in_words, output_filename); | |
- | |
- // Update I/O volume. | |
- io_volume += length_of_B_in_words * sizeof(std::uint64_t); | |
- total_io_volume += io_volume; | |
- | |
- // Print summary. | |
- long double write_plcp_time = utils::wclock() - write_plcp_start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, I/O vol = %.2Lfn\n\n", write_plcp_time, | |
- ((1.L * io_volume) / (1L << 20)) / write_plcp_time, (1.L * io_volume) / text_length); | |
- } | |
- delete[] B; | |
- } | |
-} | |
- | |
-template<typename text_offset_type, typename ext_text_offset_type> | |
-void compute_plcp_bitvector(std::string text_filename, std::string sa_filename, | |
- std::string bwt_filename, std::string output_filename, std::uint64_t ram_use) { | |
- utils::drop_disk_pages(text_filename); | |
- utils::drop_disk_pages(sa_filename); | |
- utils::drop_disk_pages(bwt_filename); | |
- srand(time(0) + getpid()); | |
- long double global_start = utils::wclock(); | |
- std::uint64_t total_io_volume = 0; | |
- | |
- // Compute basic parameters. | |
- std::uint64_t text_length = utils::file_size(text_filename); | |
- std::uint64_t n_irreducible_lcps = 0; | |
- std::uint64_t sum_irreducible_lcps = 0; | |
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use; | |
- | |
- if (text_length == 0) { | |
- fprintf(stderr, "Error: the input file is empty!\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Turn paths absolute. | |
- text_filename = utils::absolute_path(text_filename); | |
- sa_filename = utils::absolute_path(sa_filename); | |
- bwt_filename = utils::absolute_path(bwt_filename); | |
- output_filename = utils::absolute_path(output_filename); | |
- | |
- // Print summary of basic parameters. | |
- fprintf(stderr, "Text filename = %s\n", text_filename.c_str()); | |
- fprintf(stderr, "SA filename = %s\n", sa_filename.c_str()); | |
- fprintf(stderr, "BWT filename = %s\n", bwt_filename.c_str()); | |
- fprintf(stderr, "Output filename = %s\n", output_filename.c_str()); | |
- fprintf(stderr, "Text length = %lu (%.2LfMiB)\n", text_length, 1.L * text_length / (1 << 20)); | |
- fprintf(stderr, "Text size / ram_use = %.2Lf\n", text_to_ram_ratio); | |
- fprintf(stderr, "RAM use = %lu (%.2LfMiB)\n", ram_use, ram_use / (1024.L * 1024)); | |
- fprintf(stderr, "sizeof(text_offset_type) = %lu\n", sizeof(text_offset_type)); | |
- fprintf(stderr, "sizeof(ext_text_offset_type) = %lu\n", sizeof(ext_text_offset_type)); | |
-#ifdef _OPENMP | |
- fprintf(stderr, "Max number of threads = %d\n", omp_get_max_threads()); | |
-#endif | |
- fprintf(stderr, "\n"); | |
- | |
- compute_plcp_bitvector<text_offset_type, ext_text_offset_type>(text_length, ram_use, text_filename, | |
- sa_filename, bwt_filename, output_filename, n_irreducible_lcps, | |
- sum_irreducible_lcps, total_io_volume); | |
- | |
- // Print summary. | |
- long double total_time = utils::wclock() - global_start; | |
- fprintf(stderr, "\n\nComputation finished. Summary:\n"); | |
- fprintf(stderr, " elapsed time = %.2Lfs (%.3Lfs/MiB of text)\n", total_time, total_time / (1.L * text_length / (1L << 20))); | |
- fprintf(stderr, " speed = %.2LfMiB of text/s\n", (1.L * text_length / (1L << 20)) / total_time); | |
- fprintf(stderr, " I/O volume = %lu (%.2Lfbytes/input symbol)\n", total_io_volume, (1.L * total_io_volume) / text_length); | |
- fprintf(stderr, " number of irreducible LCPs = %lu\n", n_irreducible_lcps); | |
- fprintf(stderr, " sum of irreducible LCPs = %lu\n", sum_irreducible_lcps); | |
-} | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_PLCP_BITVECTOR_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/distribute_pairs_and_compute_C.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/distribute_pairs_and_compute_C.hpp | |
deleted file mode 100644 | |
index f872cb05..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/distribute_pairs_and_compute_C.hpp | |
+++ /dev/null | |
@@ -1,523 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/distribute_pairs_and_compute_C.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_DISTRIBUTE_PAIRS_AND_COMPUTE_C_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_DISTRIBUTE_PAIRS_AND_COMPUTE_C_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <string> | |
-#include <algorithm> | |
-#include <omp.h> | |
- | |
-#include "io/async_stream_reader.hpp" | |
-#include "io/async_multi_stream_writer.hpp" | |
-#include "set_bits.hpp" | |
-#include "utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-template<typename text_offset_type> | |
-std::uint64_t distribute_pairs(std::uint64_t text_length, std::uint64_t max_halfsegment_size, | |
- std::uint64_t ram_use, std::string sa_filename, std::string bwt_filename, std::string **pairs_filenames, | |
- std::uint64_t &n_irreducible_lcps, std::uint64_t &total_io_volume) { | |
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size; | |
- std::uint64_t n_irreducible = 0; | |
- std::uint64_t phi_undefined_position = 0; | |
- | |
- fprintf(stderr, " Distribute irreducible (i, Phi[i]) pairs: "); | |
- long double start = utils::wclock(); | |
- | |
- // Create a map from used halfsegment pairs to a contiguous | |
- // range of integers. This is needed to use multifile writer. | |
- std::uint64_t **halfseg_ids_to_file_id = new std::uint64_t*[n_halfsegments]; | |
- { | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) | |
- halfseg_ids_to_file_id[i] = new std::uint64_t[n_halfsegments]; | |
- | |
- std::uint64_t file_counter = 0; | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) { | |
- for (std::uint64_t j = i; j < n_halfsegments; ++j) { | |
- halfseg_ids_to_file_id[i][j] = file_counter; | |
- halfseg_ids_to_file_id[j][i] = file_counter; | |
- ++file_counter; | |
- } | |
- } | |
- } | |
- | |
- // Initialize multifile writer of (i, Phi[i]) pairs. | |
- static const std::uint64_t n_free_buffers = 4; | |
- std::uint64_t halfseg_buffers_ram = ram_use; | |
- std::uint64_t n_different_halfseg_pairs = (n_halfsegments * (n_halfsegments + 1)) / 2; | |
- std::uint64_t buffer_size = std::max(1UL, halfseg_buffers_ram / (n_different_halfseg_pairs + n_free_buffers)); | |
- typedef async_multi_stream_writer<text_offset_type> pair_multiwriter_type; | |
- pair_multiwriter_type *pair_multiwriter = new pair_multiwriter_type(buffer_size, n_free_buffers); | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) | |
- for (std::uint64_t j = i; j < n_halfsegments; ++j) | |
- pair_multiwriter->add_file(pairs_filenames[i][j]); | |
- | |
- // Initialize suffix array reader. | |
- typedef async_stream_reader<text_offset_type> sa_reader_type; | |
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename); | |
- | |
- // Initialize BWT reader. | |
- typedef async_stream_reader<std::uint8_t> bwt_reader_type; | |
- bwt_reader_type *bwt_reader = new bwt_reader_type(bwt_filename); | |
- | |
- // Distribution follows. | |
- std::uint8_t prev_bwt = 0; | |
- std::uint64_t prev_sa = 0; | |
- std::uint64_t prev_halfseg_id = 0; | |
- for (std::uint64_t i = 0; i < text_length; ++i) { | |
- std::uint64_t cur_sa = sa_reader->read(); | |
- std::uint64_t cur_halfseg_id = cur_sa / max_halfsegment_size; | |
- std::uint8_t cur_bwt = bwt_reader->read(); | |
- | |
- if (i == 0 || cur_sa == 0 || prev_sa == 0 || cur_bwt != prev_bwt) { | |
- // PLCP[cur_sa] is irreducible. Write (i, Phi[i]) to appropriate file. | |
- ++n_irreducible; | |
- if (i > 0) { | |
- std::uint64_t file_id = halfseg_ids_to_file_id[cur_halfseg_id][prev_halfseg_id]; | |
- pair_multiwriter->write_to_ith_file(file_id, (text_offset_type)cur_sa); | |
- pair_multiwriter->write_to_ith_file(file_id, (text_offset_type)prev_sa); | |
- } else phi_undefined_position = cur_sa; | |
- } | |
- | |
- prev_halfseg_id = cur_halfseg_id; | |
- prev_sa = cur_sa; | |
- prev_bwt = cur_bwt; | |
- } | |
- | |
- // Print summary. | |
- long double elapsed = utils::wclock() - start; | |
- std::uint64_t io_volume = sa_reader->bytes_read() + bwt_reader->bytes_read() + pair_multiwriter->bytes_written(); | |
- total_io_volume += io_volume; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", | |
- elapsed, ((1.L * io_volume) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length); | |
- | |
- // Clean up. | |
- delete bwt_reader; | |
- delete sa_reader; | |
- delete pair_multiwriter; | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) | |
- delete[] halfseg_ids_to_file_id[i]; | |
- delete[] halfseg_ids_to_file_id; | |
- | |
- // Return undefined Phi position. | |
- n_irreducible_lcps = n_irreducible; | |
- return phi_undefined_position; | |
-} | |
- | |
-template<typename text_offset_type> | |
-void compute_C(std::uint64_t text_length, std::uint64_t max_halfsegment_size, std::uint64_t ram_use, | |
- std::uint64_t phi_undefined_position, std::string **pairs_filenames, std::string sa_filename, | |
- std::string bwt_filename, std::string C_filename, std::uint64_t &total_io_volume) { | |
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size; | |
- std::uint64_t max_block_size = 8UL * ram_use; | |
- while (max_block_size & 63UL) | |
- ++max_block_size; | |
- | |
- std::uint64_t n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- std::uint64_t io_vol_scan_sa = (1 + sizeof(text_offset_type)) * text_length * n_blocks; | |
- | |
- std::uint64_t io_vol_scan_pairs = 0; | |
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) { | |
- std::uint64_t block_beg = block_id * max_block_size; | |
- std::uint64_t block_end = std::min(block_beg + max_block_size, text_length); | |
- for (std::uint64_t left_halfseg_id = 0; left_halfseg_id < n_halfsegments; ++left_halfseg_id) { | |
- std::uint64_t left_halfseg_beg = left_halfseg_id * max_halfsegment_size; | |
- std::uint64_t left_halfseg_end = std::min(left_halfseg_beg + max_halfsegment_size, text_length); | |
- for (std::uint64_t right_halfseg_id = left_halfseg_id; right_halfseg_id < n_halfsegments; ++right_halfseg_id) { | |
- std::uint64_t right_halfseg_beg = right_halfseg_id * max_halfsegment_size; | |
- std::uint64_t right_halfseg_end = std::min(right_halfseg_beg + max_halfsegment_size, text_length); | |
- if ((left_halfseg_end > block_beg && block_end > left_halfseg_beg) || | |
- (right_halfseg_end > block_beg && block_end > right_halfseg_beg)) | |
- io_vol_scan_pairs += utils::file_size(pairs_filenames[left_halfseg_id][right_halfseg_id]); | |
- } | |
- } | |
- } | |
- | |
- if (io_vol_scan_sa <= io_vol_scan_pairs) { | |
- fprintf(stderr, " Compute bitvector C (method I): "); | |
- long double start = utils::wclock(); | |
- std::uint64_t io_vol = 0; | |
- | |
- // Allocate the array holding the block of C. | |
- std::uint64_t max_block_size_in_words = max_block_size / 64; | |
- std::uint64_t *C = new std::uint64_t[max_block_size_in_words]; | |
- std::FILE *f = utils::file_open(C_filename, "w"); | |
- | |
- // Initialize the buffer. | |
- static const std::uint64_t buffer_size = (1UL << 20); | |
- std::uint64_t *buf = new std::uint64_t[buffer_size]; | |
-#ifdef _OPENMP | |
- std::uint64_t *tempbuf = new std::uint64_t[buffer_size]; | |
-#endif | |
- | |
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) { | |
- std::uint64_t block_beg = block_id * max_block_size; | |
- std::uint64_t block_end = std::min(block_beg + max_block_size, text_length); | |
- std::uint64_t block_size = block_end - block_beg; | |
- std::uint64_t block_size_in_words = (block_size + 63) / 64; | |
- | |
- // Zero-initialize the block of C. | |
- std::fill(C, C + block_size_in_words, 0UL); | |
- | |
- // Initialize suffix array reader. | |
- typedef async_stream_reader<text_offset_type> sa_reader_type; | |
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename); | |
- | |
- // Initialize BWT reader. | |
- typedef async_stream_reader<std::uint8_t> bwt_reader_type; | |
- bwt_reader_type *bwt_reader = new bwt_reader_type(bwt_filename); | |
- | |
- // Scan SA and BWT left to right. | |
- std::uint64_t filled = 0; | |
- std::uint8_t prev_bwt = 0; | |
- std::uint64_t prev_sa = 0; | |
- for (std::uint64_t i = 0; i < text_length; ++i) { | |
- std::uint64_t cur_sa = sa_reader->read(); | |
- std::uint8_t cur_bwt = bwt_reader->read(); | |
- | |
- if (block_beg <= cur_sa && cur_sa < block_end && | |
- (i == 0 || cur_sa == 0 || prev_sa == 0 || cur_bwt != prev_bwt)) { | |
- // PLCP[cur_sa] is irreducible. | |
- std::uint64_t offset = cur_sa - block_beg; | |
- buf[filled++] = offset; | |
- if (filled == buffer_size) { | |
-#ifdef _OPENMP | |
- set_bits(C, block_size, buf, filled, tempbuf); | |
-#else | |
- set_bits(C, buf, filled); | |
-#endif | |
- filled = 0; | |
- } | |
- } | |
- | |
- prev_sa = cur_sa; | |
- prev_bwt = cur_bwt; | |
- } | |
- | |
- // Flush the remaining items in the buffer. | |
- if (filled > 0) { | |
-#ifdef _OPENMP | |
- set_bits(C, block_size, buf, filled, tempbuf); | |
-#else | |
- set_bits(C, buf, filled); | |
-#endif | |
- filled = 0; | |
- } | |
- | |
- // Write current block of C to file. | |
- utils::write_to_file(C, block_size_in_words, f); | |
- | |
- // Update I/O volume. | |
- io_vol += sa_reader->bytes_read() + bwt_reader->bytes_read() + block_size_in_words * sizeof(std::uint64_t); | |
- | |
- // Clean up. | |
- delete sa_reader; | |
- delete bwt_reader; | |
- } | |
- | |
- // Clean up. | |
-#ifdef _OPENMP | |
- delete[] tempbuf; | |
-#endif | |
- delete[] buf; | |
- delete[] C; | |
- std::fclose(f); | |
- | |
- // Update I/O volume. | |
- total_io_volume += io_vol; | |
- | |
- // Print summary. | |
- long double elapsed = utils::wclock() - start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", elapsed, | |
- ((1.L * io_vol) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length); | |
- } else { | |
- fprintf(stderr, " Compute bitvector C (method II): "); | |
- long double start = utils::wclock(); | |
- std::uint64_t io_vol = 0; | |
- | |
- // Allocate the array holding the block of C. | |
- std::uint64_t max_block_size_in_words = max_block_size / 64; | |
- std::uint64_t *C = new std::uint64_t[max_block_size_in_words]; | |
- std::FILE *f = utils::file_open(C_filename, "w"); | |
- | |
- // Initialize the buffer. | |
- static const std::uint64_t buffer_size = (1UL << 20); | |
- std::uint64_t *buf = new std::uint64_t[buffer_size]; | |
-#ifdef _OPENMP | |
- std::uint64_t *tempbuf = new std::uint64_t[buffer_size]; | |
-#endif | |
- | |
- // Process blocks of C left to right. | |
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) { | |
- std::uint64_t block_beg = block_id * max_block_size; | |
- std::uint64_t block_end = std::min(block_beg + max_block_size, text_length); | |
- std::uint64_t block_size = block_end - block_beg; | |
- std::uint64_t block_size_in_words = (block_size + 63) / 64; | |
- | |
- // Zero-initialize the block of C. | |
- std::fill(C, C + block_size_in_words, 0UL); | |
- | |
- // Iterate through all pairs of halfsegments. | |
- std::uint64_t filled = 0; | |
- for (std::uint64_t left_halfseg_id = 0; left_halfseg_id < n_halfsegments; ++left_halfseg_id) { | |
- std::uint64_t left_halfseg_beg = left_halfseg_id * max_halfsegment_size; | |
- std::uint64_t left_halfseg_end = std::min(left_halfseg_beg + max_halfsegment_size, text_length); | |
- | |
- for (std::uint64_t right_halfseg_id = left_halfseg_id; right_halfseg_id < n_halfsegments; ++right_halfseg_id) { | |
- std::uint64_t right_halfseg_beg = right_halfseg_id * max_halfsegment_size; | |
- std::uint64_t right_halfseg_end = std::min(right_halfseg_beg + max_halfsegment_size, text_length); | |
- | |
- if ((left_halfseg_end > block_beg && block_end > left_halfseg_beg) || | |
- (right_halfseg_end > block_beg && block_end > right_halfseg_beg)) { | |
- // Initialize reading of pairs. | |
- typedef async_stream_reader<text_offset_type> pair_reader_type; | |
- pair_reader_type *pair_reader = new pair_reader_type(pairs_filenames[left_halfseg_id][right_halfseg_id]); | |
- | |
- while (pair_reader->empty() == false) { | |
- std::uint64_t i = pair_reader->read(); | |
- pair_reader->read(); // Skip Phi[i]. | |
- | |
- if (block_beg <= i && i < block_end) { | |
- std::uint64_t offset = i - block_beg; | |
- buf[filled++] = offset; | |
- if (filled == buffer_size) { | |
-#ifdef _OPENMP | |
- set_bits(C, block_size, buf, filled, tempbuf); | |
-#else | |
- set_bits(C, buf, filled); | |
-#endif | |
- filled = 0; | |
- } | |
- } | |
- } | |
- | |
- // Update I/O volume. | |
- io_vol += pair_reader->bytes_read(); | |
- | |
- // Clean up. | |
- delete pair_reader; | |
- } | |
- } | |
- } | |
- | |
- // Flush the remaining items in the buffer. | |
- if (filled > 0) { | |
-#ifdef _OPENMP | |
- set_bits(C, block_size, buf, filled, tempbuf); | |
-#else | |
- set_bits(C, buf, filled); | |
-#endif | |
- filled = 0; | |
- } | |
- | |
- // Special case. | |
- if (block_beg <= phi_undefined_position && phi_undefined_position < block_end) { | |
- std::uint64_t offset = phi_undefined_position - block_beg; | |
- C[offset >> 6] |= (1UL << (offset & 63)); | |
- } | |
- | |
- // Write current block of C to file. | |
- utils::write_to_file(C, block_size_in_words, f); | |
- | |
- // Update I/O volume. | |
- io_vol += block_size_in_words * sizeof(std::uint64_t); | |
- } | |
- | |
- // Clean up. | |
-#ifdef _OPENMP | |
- delete[] tempbuf; | |
-#endif | |
- delete[] buf; | |
- delete[] C; | |
- std::fclose(f); | |
- | |
- | |
- // Update I/O volume. | |
- total_io_volume += io_vol; | |
- | |
- // Print summary. | |
- long double elapsed = utils::wclock() - start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", elapsed, | |
- ((1.L * io_vol) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length); | |
- } | |
-} | |
- | |
-template<typename text_offset_type> | |
-std::uint64_t distribute_pairs_and_compute_C(std::uint64_t text_length, | |
- std::uint64_t max_halfsegment_size, std::uint64_t ram_use, std::string sa_filename, | |
- std::string bwt_filename, std::string C_filename, std::string **pairs_filenames, | |
- std::uint64_t &n_irreducible_lcps, std::uint64_t &total_io_volume) { | |
- fprintf(stderr, " Distribute irreducible (i, Phi[i]) pairs and compute bitvector C: "); | |
- long double start = utils::wclock(); | |
- | |
- // Initialize basic parameters. | |
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size; | |
- std::uint64_t n_different_halfseg_pairs = (n_halfsegments * (n_halfsegments + 1)) / 2; | |
- std::uint64_t io_volume = 0; | |
- std::uint64_t n_irreducible = 0; | |
- std::uint64_t phi_undefined_position = 0; | |
- | |
- // Allocate bitvector C. | |
- std::uint64_t C_size_in_words = (text_length + 63) / 64; | |
- std::uint64_t C_size_in_bytes = (text_length + 7) / 8; | |
- std::uint64_t *C = new std::uint64_t[C_size_in_words]; | |
- std::fill(C, C + C_size_in_words, 0UL); | |
- | |
- // Create a map from used halfsegment pairs to a contiguous | |
- // range of integers. This is needed to use multifile writer. | |
- std::uint64_t **halfseg_ids_to_file_id = new std::uint64_t*[n_halfsegments]; | |
- { | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) | |
- halfseg_ids_to_file_id[i] = new std::uint64_t[n_halfsegments]; | |
- | |
- std::uint64_t file_counter = 0; | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) { | |
- for (std::uint64_t j = i; j < n_halfsegments; ++j) { | |
- halfseg_ids_to_file_id[i][j] = file_counter; | |
- halfseg_ids_to_file_id[j][i] = file_counter; | |
- ++file_counter; | |
- } | |
- } | |
- } | |
- | |
- // Initialize multifile writer of (i, Phi[i]) pairs. | |
- static const std::uint64_t n_free_buffers = 4; | |
- std::uint64_t halfseg_buffers_ram = ram_use - C_size_in_bytes; | |
- std::uint64_t buffer_size = std::max((1UL << 20), halfseg_buffers_ram / (n_different_halfseg_pairs + n_free_buffers)); | |
- typedef async_multi_stream_writer<text_offset_type> pair_multiwriter_type; | |
- pair_multiwriter_type *pair_multiwriter = new pair_multiwriter_type(buffer_size, n_free_buffers); | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) | |
- for (std::uint64_t j = i; j < n_halfsegments; ++j) | |
- pair_multiwriter->add_file(pairs_filenames[i][j]); | |
- | |
- // Initialize suffix array reader. | |
- typedef async_stream_reader<text_offset_type> sa_reader_type; | |
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename); | |
- | |
- // Initialize BWT reader. | |
- typedef async_stream_reader<std::uint8_t> bwt_reader_type; | |
- bwt_reader_type *bwt_reader = new bwt_reader_type(bwt_filename); | |
- | |
- // Initialize the buffer. | |
- static const std::uint64_t local_buffer_size = (1UL << 20); | |
- std::uint64_t *buf = new std::uint64_t[local_buffer_size]; | |
-#ifdef _OPENMP | |
- std::uint64_t *tempbuf = new std::uint64_t[local_buffer_size]; | |
-#endif | |
- | |
- // Distribution follows. | |
- std::uint64_t filled = 0; | |
- std::uint8_t prev_bwt = 0; | |
- std::uint64_t prev_sa = 0; | |
- std::uint64_t prev_halfseg_id = 0; | |
- for (std::uint64_t i = 0; i < text_length; ++i) { | |
- std::uint64_t cur_sa = sa_reader->read(); | |
- std::uint64_t cur_halfseg_id = cur_sa / max_halfsegment_size; | |
- std::uint8_t cur_bwt = bwt_reader->read(); | |
- | |
- if (i == 0 || cur_sa == 0 || prev_sa == 0 || cur_bwt != prev_bwt) { | |
- // PLCP[cur_sa] is irreducible. Write (i, Phi[i]) to appropriate file. | |
- ++n_irreducible; | |
- buf[filled++] = cur_sa; | |
- if (filled == local_buffer_size) { | |
-#ifdef _OPENMP | |
- set_bits(C, text_length, buf, filled, tempbuf); | |
-#else | |
- set_bits(C, buf, filled); | |
-#endif | |
- filled = 0; | |
- } | |
- | |
- if (i > 0) { | |
- std::uint64_t file_id = halfseg_ids_to_file_id[cur_halfseg_id][prev_halfseg_id]; | |
- pair_multiwriter->write_to_ith_file(file_id, (text_offset_type)cur_sa); | |
- pair_multiwriter->write_to_ith_file(file_id, (text_offset_type)prev_sa); | |
- } else phi_undefined_position = cur_sa; | |
- } | |
- | |
- prev_halfseg_id = cur_halfseg_id; | |
- prev_sa = cur_sa; | |
- prev_bwt = cur_bwt; | |
- } | |
- | |
- // Flush the remaining items in the buffer. | |
- if (filled > 0) { | |
-#ifdef _OPENMP | |
- set_bits(C, text_length, buf, filled, tempbuf); | |
-#else | |
- set_bits(C, buf, filled); | |
-#endif | |
- filled = 0; | |
- } | |
- | |
- // Write C to disk. | |
- utils::write_to_file(C, C_size_in_words, C_filename); | |
- | |
- // Update I/O volume. | |
- io_volume += sa_reader->bytes_read() + bwt_reader->bytes_read() + | |
- pair_multiwriter->bytes_written() + C_size_in_words * sizeof(std::uint64_t); | |
- total_io_volume += io_volume; | |
- | |
- // Clean up. | |
-#ifdef _OPENMP | |
- delete[] tempbuf; | |
-#endif | |
- delete[] buf; | |
- delete[] C; | |
- delete bwt_reader; | |
- delete sa_reader; | |
- delete pair_multiwriter; | |
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) | |
- delete[] halfseg_ids_to_file_id[i]; | |
- delete[] halfseg_ids_to_file_id; | |
- | |
- // Print summary. | |
- long double elapsed = utils::wclock() - start; | |
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", | |
- elapsed, ((1.L * io_volume) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length); | |
- | |
- // Update reference variables. | |
- n_irreducible_lcps = n_irreducible; | |
- | |
- // Return undefined Phi position. | |
- return phi_undefined_position; | |
-} | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_DISTRIBUTE_PAIRS_AND_COMPUTE_C_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multi_stream_writer.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multi_stream_writer.hpp | |
deleted file mode 100644 | |
index 91981e83..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multi_stream_writer.hpp | |
+++ /dev/null | |
@@ -1,291 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/io/async_multi_stream_writer.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTI_STREAM_WRITER_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTI_STREAM_WRITER_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <vector> | |
-#include <queue> | |
-#include <string> | |
-#include <algorithm> | |
-#include <condition_variable> | |
-#include <mutex> | |
-#include <thread> | |
- | |
-#include "../utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-template<typename value_type> | |
-class async_multi_stream_writer { | |
- private: | |
- template<typename T> | |
- struct buffer { | |
- buffer(std::uint64_t size) { | |
- m_size = size; | |
- m_content = (T *)malloc(m_size * sizeof(T)); | |
- m_filled = 0; | |
- } | |
- | |
- void write_to_file(std::FILE *f) { | |
- utils::write_to_file(m_content, m_filled, f); | |
- m_filled = 0; | |
- } | |
- | |
- ~buffer() { | |
- free(m_content); | |
- } | |
- | |
- inline bool empty() const { return m_filled == 0; } | |
- inline bool full() const { return m_filled == m_size; } | |
- inline std::uint64_t size_in_bytes() const { return sizeof(T) * m_filled; } | |
- | |
- T *m_content; | |
- std::uint64_t m_size; | |
- std::uint64_t m_filled; | |
- }; | |
- | |
- template<typename buffer_type> | |
- struct request { | |
- request(buffer_type *buffer, std::uint64_t file_id) { | |
- m_buffer = buffer; | |
- m_file_id = file_id; | |
- } | |
- | |
- buffer_type *m_buffer; | |
- std::uint64_t m_file_id; | |
- }; | |
- | |
- template<typename request_type> | |
- struct request_queue { | |
- request_queue() | |
- : m_no_more_requests(false) {} | |
- | |
- request_type get() { | |
- request_type ret = m_requests.front(); | |
- m_requests.pop(); | |
- return ret; | |
- } | |
- | |
- inline void add(request_type request) { | |
- std::lock_guard<std::mutex> lk(m_mutex); | |
- m_requests.push(request); | |
- } | |
- | |
- inline bool empty() const { return m_requests.empty(); } | |
- | |
- std::queue<request_type> m_requests; // Must have FIFO property | |
- std::condition_variable m_cv; | |
- std::mutex m_mutex; | |
- bool m_no_more_requests; | |
- }; | |
- | |
- template<typename buffer_type> | |
- struct buffer_collection { | |
- // Separate method to allow locking. | |
- inline void add(buffer_type *buffer) { | |
- std::lock_guard<std::mutex> lk(m_mutex); | |
- m_buffers.push_back(buffer); | |
- } | |
- | |
- buffer_type* get() { | |
- buffer_type *ret = m_buffers.back(); | |
- m_buffers.pop_back(); | |
- return ret; | |
- } | |
- | |
- inline bool empty() const { return m_buffers.empty(); } | |
- | |
- std::vector<buffer_type*> m_buffers; | |
- std::condition_variable m_cv; | |
- std::mutex m_mutex; | |
- }; | |
- | |
- private: | |
- template<typename T> | |
- static void async_io_thread_code(async_multi_stream_writer<T> *caller) { | |
- typedef buffer<T> buffer_type; | |
- typedef request<buffer_type> request_type; | |
- while (true) { | |
- // Wait for request or until 'no more requests' flag is set. | |
- std::unique_lock<std::mutex> lk(caller->m_write_requests.m_mutex); | |
- while (caller->m_write_requests.empty() && | |
- !(caller->m_write_requests.m_no_more_requests)) | |
- caller->m_write_requests.m_cv.wait(lk); | |
- | |
- if (caller->m_write_requests.empty() && | |
- caller->m_write_requests.m_no_more_requests) { | |
- // No more requests -- exit. | |
- lk.unlock(); | |
- break; | |
- } | |
- | |
- // Extract the buffer from the collection. | |
- request_type request = caller->m_write_requests.get(); | |
- lk.unlock(); | |
- | |
- // Write the data to disk. | |
- request.m_buffer->write_to_file(caller->m_files[request.m_file_id]); | |
- | |
- // Add the (now empty) buffer to the collection | |
- // of empty buffers and notify the waiting thread. | |
- caller->m_free_buffers.add(request.m_buffer); | |
- caller->m_free_buffers.m_cv.notify_one(); | |
- } | |
- } | |
- | |
- private: | |
- typedef buffer<value_type> buffer_type; | |
- typedef request<buffer_type> request_type; | |
- | |
- std::uint64_t m_bytes_written; | |
- std::uint64_t m_items_per_buf; | |
- | |
- std::vector<std::FILE*> m_files; | |
- std::vector<buffer_type*> m_buffers; | |
- buffer_collection<buffer_type> m_free_buffers; | |
- request_queue<request_type> m_write_requests; | |
- std::thread *m_io_thread; | |
- | |
- // Issue a request to write to buffer. | |
- void issue_write_request(std::uint64_t file_id) { | |
- request_type req(m_buffers[file_id], file_id); | |
- m_buffers[file_id] = NULL; | |
- m_write_requests.add(req); | |
- m_write_requests.m_cv.notify_one(); | |
- } | |
- | |
- // Get a free buffer from the collection of free buffers. | |
- buffer_type* get_free_buffer() { | |
- std::unique_lock<std::mutex> lk(m_free_buffers.m_mutex); | |
- while (m_free_buffers.empty()) | |
- m_free_buffers.m_cv.wait(lk); | |
- buffer_type *ret = m_free_buffers.get(); | |
- lk.unlock(); | |
- return ret; | |
- } | |
- | |
- public: | |
- async_multi_stream_writer(std::uint64_t bufsize_per_file_in_bytes = (1UL << 20), | |
- std::uint64_t n_free_buffers = 4UL) { | |
- // Initialize basic parameters. | |
- // Works even with n_free_buffers == 0. | |
- m_bytes_written = 0; | |
- m_items_per_buf = std::max(1UL, bufsize_per_file_in_bytes / sizeof(value_type)); | |
- | |
- // Initialize empty buffers. | |
- for (std::uint64_t j = 0; j < n_free_buffers; ++j) | |
- m_free_buffers.add(new buffer_type(m_items_per_buf)); | |
- | |
- // Start the I/O thread. | |
- m_io_thread = new std::thread(async_io_thread_code<value_type>, this); | |
- } | |
- | |
- // The added file gets the next available ID (starting from 0). | |
- void add_file(std::string filename, std::string write_mode = | |
- std::string("w")) { | |
- m_buffers.push_back(new buffer_type(m_items_per_buf)); | |
- m_files.push_back(utils::file_open_nobuf(filename, write_mode)); | |
- } | |
- | |
- // Write value to i-th file. | |
- void write_to_ith_file(std::uint64_t i, value_type value) { | |
- m_bytes_written += sizeof(value_type); | |
- m_buffers[i]->m_content[m_buffers[i]->m_filled++] = value; | |
- if (m_buffers[i]->full()) { | |
- issue_write_request(i); | |
- m_buffers[i] = get_free_buffer(); | |
- } | |
- } | |
- | |
- // Write values[0..length) to i-th file. | |
- void write_to_ith_file(std::uint64_t i, const value_type *values, std::uint64_t length) { | |
- m_bytes_written += length * sizeof(value_type); | |
- while (length > 0) { | |
- std::uint64_t towrite = std::min(length, m_items_per_buf - m_buffers[i]->m_filled); | |
- std::copy(values, values + towrite, m_buffers[i]->m_content + m_buffers[i]->m_filled); | |
- m_buffers[i]->m_filled += towrite; | |
- length -= towrite; | |
- values += towrite; | |
- if (m_buffers[i]->full()) { | |
- issue_write_request(i); | |
- m_buffers[i] = get_free_buffer(); | |
- } | |
- } | |
- } | |
- | |
- // Return performed I/O in bytes. | |
- inline std::uint64_t bytes_written() const { | |
- return m_bytes_written; | |
- } | |
- | |
- // Destructor. | |
- ~async_multi_stream_writer() { | |
- // Flush all buffers. | |
- std::uint64_t n_buffers = m_buffers.size(); | |
- for (std::uint64_t file_id = 0; file_id < n_buffers; ++file_id) { | |
- if (!(m_buffers[file_id]->empty())) | |
- issue_write_request(file_id); | |
- } | |
- | |
- // Let the I/O thread know that there | |
- // won't be any more requests. | |
- std::unique_lock<std::mutex> lk(m_write_requests.m_mutex); | |
- m_write_requests.m_no_more_requests = true; | |
- lk.unlock(); | |
- m_write_requests.m_cv.notify_one(); | |
- | |
- // Wait for the I/O thread to finish. | |
- m_io_thread->join(); | |
- delete m_io_thread; | |
- | |
- // Delete buffers and close files. | |
- for (std::uint64_t file_id = 0; file_id < n_buffers; ++file_id) { | |
- delete m_buffers[file_id]; // Can be NULL | |
- std::fclose(m_files[file_id]); | |
- } | |
- | |
- // Delete free buffers. | |
- while (!(m_free_buffers.empty())) { | |
- buffer_type *buf = m_free_buffers.get(); | |
- delete buf; | |
- } | |
- } | |
-}; | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTI_STREAM_WRITER_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_file_writer.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_file_writer.hpp | |
deleted file mode 100644 | |
index 726e6193..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_file_writer.hpp | |
+++ /dev/null | |
@@ -1,281 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/io/async_multipart_file_writer.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_FILE_WRITER_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_FILE_WRITER_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <queue> | |
-#include <string> | |
-#include <algorithm> | |
-#include <condition_variable> | |
-#include <mutex> | |
-#include <thread> | |
- | |
-#include "../utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-template<typename value_type> | |
-class async_multipart_file_writer { | |
- private: | |
- template<typename T> | |
- struct buffer { | |
- buffer(std::uint64_t size) { | |
- m_size = size; | |
- m_content = (T *)malloc(m_size * sizeof(T)); | |
- m_filled = 0; | |
- } | |
- | |
- ~buffer() { | |
- free(m_content); | |
- } | |
- | |
- inline std::uint64_t size_in_bytes() const { return sizeof(T) * m_filled; } | |
- inline std::uint64_t free_space() const { return m_size - m_filled; } | |
- | |
- inline bool empty() const { return m_filled == 0; } | |
- inline bool full() const { return m_filled == m_size; } | |
- | |
- T *m_content; | |
- std::uint64_t m_size; | |
- std::uint64_t m_filled; | |
- }; | |
- | |
- template<typename buffer_type> | |
- struct buffer_queue { | |
- buffer_queue(std::uint64_t n_buffers = 0, std::uint64_t items_per_buf = 0) { | |
- m_signal_stop = false; | |
- for (std::uint64_t i = 0; i < n_buffers; ++i) | |
- m_queue.push(new buffer_type(items_per_buf)); | |
- } | |
- | |
- ~buffer_queue() { | |
- while (!m_queue.empty()) { | |
- buffer_type *buf = m_queue.front(); | |
- m_queue.pop(); | |
- delete buf; | |
- } | |
- } | |
- | |
- buffer_type *pop() { | |
- buffer_type *ret = m_queue.front(); | |
- m_queue.pop(); | |
- return ret; | |
- } | |
- | |
- void push(buffer_type *buf) { | |
- std::lock_guard<std::mutex> lk(m_mutex); | |
- m_queue.push(buf); | |
- } | |
- | |
- void send_stop_signal() { | |
- std::lock_guard<std::mutex> lk(m_mutex); | |
- m_signal_stop = true; | |
- } | |
- | |
- inline bool empty() const { return m_queue.empty(); } | |
- | |
- std::queue<buffer_type*> m_queue; // Must have FIFO property | |
- std::condition_variable m_cv; | |
- std::mutex m_mutex; | |
- bool m_signal_stop; | |
- }; | |
- | |
- private: | |
- typedef buffer<value_type> buffer_type; | |
- typedef buffer_queue<buffer_type> buffer_queue_type; | |
- | |
- buffer_queue_type *m_empty_buffers; | |
- buffer_queue_type *m_full_buffers; | |
- | |
- private: | |
- template<typename T> | |
- static void io_thread_code(async_multipart_file_writer<T> *caller) { | |
- typedef buffer<T> buffer_type; | |
- while (true) { | |
- // Wait for the full buffer (or a stop signal). | |
- std::unique_lock<std::mutex> lk(caller->m_full_buffers->m_mutex); | |
- while (caller->m_full_buffers->empty() && | |
- !(caller->m_full_buffers->m_signal_stop)) | |
- caller->m_full_buffers->m_cv.wait(lk); | |
- | |
- if (caller->m_full_buffers->empty()) { | |
- // We received the stop signal -- exit. | |
- lk.unlock(); | |
- break; | |
- } | |
- | |
- // Extract the buffer from the collection. | |
- buffer_type *buffer = caller->m_full_buffers->pop(); | |
- lk.unlock(); | |
- | |
- // Safely write the data to disk. | |
- const T *ptr = buffer->m_content; | |
- while (buffer->m_filled > 0) { | |
- if (caller->m_file == NULL || caller->m_cur_part_items_written == caller->m_single_part_max_items) { | |
- if (caller->m_file != NULL) { | |
- std::fclose(caller->m_file); | |
- ++caller->m_cur_part; | |
- } else caller->m_cur_part = 0; | |
- std::string cur_part_filename = caller->m_filename + | |
- ".multipart_file.part" + utils::intToStr(caller->m_cur_part); | |
- caller->m_file = utils::file_open(cur_part_filename, "w"); | |
- caller->m_cur_part_items_written = 0; | |
- } | |
- | |
- std::uint64_t cur_part_items_left = caller->m_single_part_max_items - caller->m_cur_part_items_written; | |
- std::uint64_t towrite = std::min(cur_part_items_left, buffer->m_filled); | |
- utils::write_to_file(ptr, towrite, caller->m_file); | |
- caller->m_cur_part_items_written += towrite; | |
- buffer->m_filled -= towrite; | |
- ptr += towrite; | |
- } | |
- | |
- // Add the (now empty) buffer to the collection | |
- // of empty buffers and notify the waiting thread. | |
- caller->m_empty_buffers->push(buffer); | |
- caller->m_empty_buffers->m_cv.notify_one(); | |
- } | |
- } | |
- | |
- // Get a free buffer from the collection of free buffers. | |
- buffer_type* get_empty_buffer() { | |
- std::unique_lock<std::mutex> lk(m_empty_buffers->m_mutex); | |
- while (m_empty_buffers->empty()) | |
- m_empty_buffers->m_cv.wait(lk); | |
- buffer_type *ret = m_empty_buffers->pop(); | |
- lk.unlock(); | |
- return ret; | |
- } | |
- | |
- private: | |
- std::FILE *m_file; | |
- std::string m_filename; | |
- | |
- std::uint64_t m_cur_part; | |
- std::uint64_t m_single_part_max_items; | |
- std::uint64_t m_cur_part_items_written; | |
- std::uint64_t m_bytes_written; | |
- std::uint64_t m_items_per_buf; | |
- | |
- buffer_type *m_cur_buffer; | |
- std::thread *m_io_thread; | |
- | |
- public: | |
- async_multipart_file_writer(std::string filename, | |
- std::uint64_t single_part_max_bytes, | |
- std::uint64_t total_buf_size_bytes = (8UL << 20), | |
- std::uint64_t n_buffers = 4UL) { | |
- m_filename = filename; | |
- | |
- // Initialize basic parameters. Note: if no items are | |
- // written, this class does not create any files. | |
- m_single_part_max_items = std::max(1UL, single_part_max_bytes / sizeof(value_type)); | |
- m_file = NULL; | |
- | |
- // Allocate buffers. | |
- std::uint64_t total_buf_size_items = total_buf_size_bytes / sizeof(value_type); | |
- m_items_per_buf = std::max(1UL, total_buf_size_items / n_buffers); | |
- m_empty_buffers = new buffer_queue_type(n_buffers, m_items_per_buf); | |
- m_full_buffers = new buffer_queue_type(); | |
- | |
- // Initialize empty buffer. | |
- m_cur_buffer = get_empty_buffer(); | |
- m_bytes_written = 0; | |
- | |
- // Start the I/O thread. | |
- m_io_thread = new std::thread(io_thread_code<value_type>, this); | |
- } | |
- | |
- ~async_multipart_file_writer() { | |
- // Send the last incomplete buffer for writing. | |
- if (!(m_cur_buffer->empty())) { | |
- m_full_buffers->push(m_cur_buffer); | |
- m_full_buffers->m_cv.notify_one(); | |
- m_cur_buffer = NULL; | |
- } | |
- | |
- // Let the I/O thread know that we're done. | |
- m_full_buffers->send_stop_signal(); | |
- m_full_buffers->m_cv.notify_one(); | |
- | |
- // Wait for the I/O thread to finish. | |
- m_io_thread->join(); | |
- | |
- // Clean up. | |
- delete m_empty_buffers; | |
- delete m_full_buffers; | |
- delete m_io_thread; | |
- if (m_file != NULL) | |
- std::fclose(m_file); | |
- if (m_cur_buffer != NULL) | |
- delete m_cur_buffer; | |
- } | |
- | |
- inline void write(value_type x) { | |
- m_bytes_written += sizeof(value_type); | |
- m_cur_buffer->m_content[m_cur_buffer->m_filled++] = x; | |
- if (m_cur_buffer->full()) { | |
- m_full_buffers->push(m_cur_buffer); | |
- m_full_buffers->m_cv.notify_one(); | |
- m_cur_buffer = get_empty_buffer(); | |
- } | |
- } | |
- | |
- inline void write(const value_type *values, std::uint64_t length) { | |
- m_bytes_written += length * sizeof(value_type); | |
- while (length > 0) { | |
- std::uint64_t tocopy = std::min(length, m_cur_buffer->free_space()); | |
- std::copy(values, values + tocopy, m_cur_buffer->m_content + m_cur_buffer->m_filled); | |
- m_cur_buffer->m_filled += tocopy; | |
- values += tocopy; | |
- length -= tocopy; | |
- if (m_cur_buffer->full()) { | |
- m_full_buffers->push(m_cur_buffer); | |
- m_full_buffers->m_cv.notify_one(); | |
- m_cur_buffer = get_empty_buffer(); | |
- } | |
- } | |
- } | |
- | |
- inline std::uint64_t bytes_written() const { | |
- return m_bytes_written; | |
- } | |
-}; | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_FILE_WRITER_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_multifile_reader.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_multifile_reader.hpp | |
deleted file mode 100644 | |
index 55900d80..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_multifile_reader.hpp | |
+++ /dev/null | |
@@ -1,302 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/io/async_multipart_multifile_reader.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_MULTIFILE_READER_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_MULTIFILE_READER_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <queue> | |
-#include <string> | |
-#include <algorithm> | |
-#include <condition_variable> | |
-#include <mutex> | |
-#include <thread> | |
- | |
-#include "../utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-template<typename value_type> | |
-class async_multipart_multifile_reader { | |
- private: | |
- template<typename T> | |
- struct buffer { | |
- buffer(std::uint64_t size) { | |
- m_size = size; | |
- m_content = (T *)malloc(m_size * sizeof(T)); | |
- m_filled = 0; | |
- m_is_filled = false; | |
- } | |
- | |
- void read_from_file(std::FILE *f) { | |
- m_filled = std::fread(m_content, sizeof(T), m_size, f); | |
- } | |
- | |
- ~buffer() { | |
- free(m_content); | |
- } | |
- | |
- inline std::uint64_t size_in_bytes() const { return sizeof(T) * m_filled; } | |
- inline bool empty() const { return m_filled == 0; } | |
- | |
- T *m_content; | |
- std::uint64_t m_filled; | |
- std::uint64_t m_size; | |
- bool m_is_filled; | |
- }; | |
- | |
- template<typename buffer_type> | |
- struct request { | |
- request(buffer_type *buffer, std::uint64_t file_id) { | |
- m_buffer = buffer; | |
- m_file_id = file_id; | |
- } | |
- | |
- buffer_type *m_buffer; | |
- std::uint64_t m_file_id; | |
- }; | |
- | |
- template<typename request_type> | |
- struct request_queue { | |
- request_queue() | |
- : m_no_more_requests(false) {} | |
- | |
- request_type get() { | |
- request_type ret = m_requests.front(); | |
- m_requests.pop(); | |
- return ret; | |
- } | |
- | |
- inline void add(request_type request) { | |
- std::lock_guard<std::mutex> lk(m_mutex); | |
- m_requests.push(request); | |
- } | |
- | |
- inline bool empty() const { return m_requests.empty(); } | |
- | |
- std::queue<request_type> m_requests; | |
- std::condition_variable m_cv; | |
- std::mutex m_mutex; | |
- bool m_no_more_requests; | |
- }; | |
- | |
- private: | |
- template<typename T> | |
- static void async_io_thread_code(async_multipart_multifile_reader<T> *caller) { | |
- typedef buffer<T> buffer_type; | |
- typedef request<buffer_type> request_type; | |
- while (true) { | |
- // Wait for request or until 'no more requests' flag is set. | |
- std::unique_lock<std::mutex> lk(caller->m_read_requests.m_mutex); | |
- while (caller->m_read_requests.empty() && | |
- !(caller->m_read_requests.m_no_more_requests)) | |
- caller->m_read_requests.m_cv.wait(lk); | |
- | |
- if (caller->m_read_requests.empty() && | |
- caller->m_read_requests.m_no_more_requests) { | |
- // No more requests -- exit. | |
- lk.unlock(); | |
- break; | |
- } | |
- | |
- // Extract the buffer from the collection. | |
- request_type request = caller->m_read_requests.get(); | |
- lk.unlock(); | |
- | |
- // Process the request. | |
- if (caller->m_files[request.m_file_id] == NULL) { | |
- // Attempt to open and read from the file. | |
- std::string cur_part_filename = caller->m_filenames[request.m_file_id] + | |
- ".multipart_file.part" + utils::intToStr(caller->m_cur_part[request.m_file_id]); | |
- if (utils::file_exists(cur_part_filename)) { | |
- caller->m_files[request.m_file_id] = utils::file_open(cur_part_filename, "r"); | |
- request.m_buffer->read_from_file(caller->m_files[request.m_file_id]); | |
- } else request.m_buffer->m_filled = 0; | |
- } else { | |
- request.m_buffer->read_from_file(caller->m_files[request.m_file_id]); | |
- if (request.m_buffer->empty()) { | |
- // Close and delete current file. | |
- std::fclose(caller->m_files[request.m_file_id]); | |
- caller->m_files[request.m_file_id] = NULL; | |
- std::string cur_part_filename = caller->m_filenames[request.m_file_id] + | |
- ".multipart_file.part" + utils::intToStr(caller->m_cur_part[request.m_file_id]); | |
- utils::file_delete(cur_part_filename); | |
- | |
- // Attempt to read from the next file. | |
- ++caller->m_cur_part[request.m_file_id]; | |
- cur_part_filename = caller->m_filenames[request.m_file_id] + | |
- ".multipart_file.part" + utils::intToStr(caller->m_cur_part[request.m_file_id]); | |
- if (utils::file_exists(cur_part_filename)) { | |
- caller->m_files[request.m_file_id] = utils::file_open(cur_part_filename, "r"); | |
- request.m_buffer->read_from_file(caller->m_files[request.m_file_id]); | |
- } else request.m_buffer->m_filled = 0; | |
- } | |
- } | |
- caller->m_bytes_read += request.m_buffer->size_in_bytes(); | |
- | |
- // Update the status of the buffer | |
- // and notify the waiting thread. | |
- std::unique_lock<std::mutex> lk2(caller->m_mutexes[request.m_file_id]); | |
- request.m_buffer->m_is_filled = true; | |
- lk2.unlock(); | |
- caller->m_cvs[request.m_file_id].notify_one(); | |
- } | |
- } | |
- | |
- private: | |
- typedef buffer<value_type> buffer_type; | |
- typedef request<buffer_type> request_type; | |
- | |
- std::uint64_t m_bytes_read; | |
- std::uint64_t m_items_per_buf; | |
- std::uint64_t n_files; | |
- std::uint64_t m_files_added; | |
- | |
- std::FILE **m_files; | |
- std::string *m_filenames; | |
- std::uint64_t *m_cur_part; | |
- | |
- std::uint64_t *m_active_buffer_pos; | |
- buffer_type **m_active_buffers; | |
- buffer_type **m_passive_buffers; | |
- std::mutex *m_mutexes; | |
- std::condition_variable *m_cvs; | |
- | |
- request_queue<request_type> m_read_requests; | |
- std::thread *m_io_thread; | |
- | |
- private: | |
- void issue_read_request(std::uint64_t file_id) { | |
- request_type req(m_passive_buffers[file_id], file_id); | |
- m_read_requests.add(req); | |
- m_read_requests.m_cv.notify_one(); | |
- } | |
- | |
- void receive_new_buffer(std::uint64_t file_id) { | |
- // Wait for the I/O thread to finish reading passive buffer. | |
- std::unique_lock<std::mutex> lk(m_mutexes[file_id]); | |
- while (m_passive_buffers[file_id]->m_is_filled == false) | |
- m_cvs[file_id].wait(lk); | |
- | |
- // Swap active and bassive buffers. | |
- std::swap(m_active_buffers[file_id], m_passive_buffers[file_id]); | |
- m_active_buffer_pos[file_id] = 0; | |
- m_passive_buffers[file_id]->m_is_filled = false; | |
- lk.unlock(); | |
- | |
- // Issue the read request for the passive buffer. | |
- issue_read_request(file_id); | |
- } | |
- | |
- public: | |
- async_multipart_multifile_reader(std::uint64_t number_of_files, | |
- std::uint64_t buf_size_bytes = (1UL << 19)) { | |
- // Initialize basic parameters. | |
- n_files = number_of_files; | |
- m_files_added = 0; | |
- m_bytes_read = 0; | |
- m_items_per_buf = std::max(1UL, buf_size_bytes / sizeof(value_type)); | |
- | |
- m_mutexes = new std::mutex[n_files]; | |
- m_cvs = new std::condition_variable[n_files]; | |
- m_active_buffer_pos = new std::uint64_t[n_files]; | |
- m_files = new std::FILE*[n_files]; | |
- m_filenames = new std::string[n_files]; | |
- m_cur_part = new std::uint64_t[n_files]; | |
- m_active_buffers = new buffer_type*[n_files]; | |
- m_passive_buffers = new buffer_type*[n_files]; | |
- | |
- for (std::uint64_t i = 0; i < n_files; ++i) { | |
- m_active_buffer_pos[i] = 0; | |
- m_active_buffers[i] = new buffer_type(m_items_per_buf); | |
- m_passive_buffers[i] = new buffer_type(m_items_per_buf); | |
- } | |
- | |
- m_io_thread = new std::thread(async_io_thread_code<value_type>, this); | |
- } | |
- | |
- // The added file gets the next available ID (file IDs start from 0). | |
- void add_file(std::string filename) { | |
- m_filenames[m_files_added] = filename; | |
- m_files[m_files_added] = NULL; | |
- m_cur_part[m_files_added] = 0; | |
- issue_read_request(m_files_added); | |
- ++m_files_added; | |
- } | |
- | |
- // Read from i-th file. | |
- value_type read_from_ith_file(std::uint64_t i) { | |
- if (m_active_buffer_pos[i] == m_active_buffers[i]->m_filled) | |
- receive_new_buffer(i); | |
- return m_active_buffers[i]->m_content[m_active_buffer_pos[i]++]; | |
- } | |
- | |
- inline std::uint64_t bytes_read() const { | |
- return m_bytes_read; | |
- } | |
- | |
- ~async_multipart_multifile_reader() { | |
- // Let the I/O thread know that there | |
- // won't be any more requests. | |
- std::unique_lock<std::mutex> lk(m_read_requests.m_mutex); | |
- m_read_requests.m_no_more_requests = true; | |
- lk.unlock(); | |
- m_read_requests.m_cv.notify_one(); | |
- | |
- // Wait for the I/O to finish. | |
- m_io_thread->join(); | |
- delete m_io_thread; | |
- | |
- // Delete buffers. | |
- for (std::uint64_t i = 0; i < n_files; ++i) { | |
- delete m_active_buffers[i]; | |
- delete m_passive_buffers[i]; | |
- } | |
- | |
- // Rest of the cleanup. | |
- delete[] m_active_buffers; | |
- delete[] m_passive_buffers; | |
- delete[] m_mutexes; | |
- delete[] m_cvs; | |
- delete[] m_active_buffer_pos; | |
- delete[] m_files; | |
- delete[] m_filenames; | |
- delete[] m_cur_part; | |
- } | |
-}; | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_MULTIFILE_READER_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_reader.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_reader.hpp | |
deleted file mode 100644 | |
index 354a491d..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_reader.hpp | |
+++ /dev/null | |
@@ -1,355 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/io/async_stream_reader.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_READER_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_READER_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <queue> | |
-#include <string> | |
-#include <algorithm> | |
-#include <condition_variable> | |
-#include <mutex> | |
-#include <thread> | |
- | |
-#include "../utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-template<typename value_type> | |
-class async_stream_reader { | |
- private: | |
- template<typename T> | |
- struct buffer { | |
- buffer(std::uint64_t size) { | |
- m_size = size; | |
- m_content = (T *)malloc(m_size * sizeof(T)); | |
- m_filled = 0; | |
- } | |
- | |
- void read_from_file(std::FILE *f) { | |
- m_filled = std::fread(m_content, sizeof(T), m_size, f); | |
- } | |
- | |
- std::uint64_t size_in_bytes() const { | |
- return sizeof(T) * m_filled; | |
- } | |
- | |
- ~buffer() { | |
- free(m_content); | |
- } | |
- | |
- T *m_content; | |
- std::uint64_t m_size; | |
- std::uint64_t m_filled; | |
- }; | |
- | |
- template<typename buffer_type> | |
- struct buffer_queue { | |
- buffer_queue(std::uint64_t n_buffers = 0, std::uint64_t items_per_buf = 0) { | |
- m_signal_stop = false; | |
- for (std::uint64_t i = 0; i < n_buffers; ++i) | |
- m_queue.push(new buffer_type(items_per_buf)); | |
- } | |
- | |
- ~buffer_queue() { | |
- while (!m_queue.empty()) { | |
- buffer_type *buf = m_queue.front(); | |
- m_queue.pop(); | |
- delete buf; | |
- } | |
- } | |
- | |
- buffer_type *pop() { | |
- buffer_type *ret = m_queue.front(); | |
- m_queue.pop(); | |
- return ret; | |
- } | |
- | |
- void push(buffer_type *buf) { | |
- std::lock_guard<std::mutex> lk(m_mutex); | |
- m_queue.push(buf); | |
- } | |
- | |
- void send_stop_signal() { | |
- std::lock_guard<std::mutex> lk(m_mutex); | |
- m_signal_stop = true; | |
- } | |
- | |
- inline bool empty() const { return m_queue.empty(); } | |
- | |
- std::queue<buffer_type*> m_queue; // Must have FIFO property | |
- std::condition_variable m_cv; | |
- std::mutex m_mutex; | |
- bool m_signal_stop; | |
- }; | |
- | |
- private: | |
- typedef buffer<value_type> buffer_type; | |
- typedef buffer_queue<buffer_type> buffer_queue_type; | |
- | |
- buffer_queue_type *m_empty_buffers; | |
- buffer_queue_type *m_full_buffers; | |
- | |
- private: | |
- template<typename T> | |
- static void io_thread_code(async_stream_reader<T> *caller) { | |
- typedef buffer<T> buffer_type; | |
- while (true) { | |
- // Wait for an empty buffer (or a stop signal). | |
- std::unique_lock<std::mutex> lk(caller->m_empty_buffers->m_mutex); | |
- while (caller->m_empty_buffers->empty() && | |
- !(caller->m_empty_buffers->m_signal_stop)) | |
- caller->m_empty_buffers->m_cv.wait(lk); | |
- | |
- if (caller->m_empty_buffers->empty()) { | |
- // We received the stop signal -- exit. | |
- lk.unlock(); | |
- break; | |
- } | |
- | |
- // Extract the buffer from the queue. | |
- buffer_type *buffer = caller->m_empty_buffers->pop(); | |
- lk.unlock(); | |
- | |
- // Read the data from disk. | |
- buffer->read_from_file(caller->m_file); | |
- caller->m_bytes_read += buffer->size_in_bytes(); | |
- | |
- // Check if we reached the end of file. | |
- bool end_of_file = false; | |
- if (buffer->m_filled < buffer->m_size) | |
- end_of_file = true; | |
- | |
- if (buffer->m_filled > 0) { | |
- // Add the buffer to the queue of filled buffers. | |
- caller->m_full_buffers->push(buffer); | |
- caller->m_full_buffers->m_cv.notify_one(); | |
- } else { | |
- // Reinsert into the queue of empty buffers. | |
- caller->m_empty_buffers->push(buffer); | |
- } | |
- | |
- // If we reached the end of file -- exit. | |
- if (end_of_file == true) { | |
- caller->m_full_buffers->send_stop_signal(); | |
- caller->m_full_buffers->m_cv.notify_one(); | |
- break; | |
- } | |
- } | |
- } | |
- | |
- public: | |
- void receive_new_buffer() { | |
- // Push the current buffer back to the poll of empty buffers. | |
- if (m_cur_buffer != NULL) { | |
- m_empty_buffers->push(m_cur_buffer); | |
- m_empty_buffers->m_cv.notify_one(); | |
- m_cur_buffer = NULL; | |
- } | |
- | |
- // Extract a filled buffer. | |
- std::unique_lock<std::mutex> lk(m_full_buffers->m_mutex); | |
- while (m_full_buffers->empty() && !(m_full_buffers->m_signal_stop)) | |
- m_full_buffers->m_cv.wait(lk); | |
- m_cur_buffer_pos = 0; | |
- if (m_full_buffers->empty()) { | |
- lk.unlock(); | |
- m_cur_buffer_filled = 0; | |
- } else { | |
- m_cur_buffer = m_full_buffers->pop(); | |
- lk.unlock(); | |
- m_cur_buffer_filled = m_cur_buffer->m_filled; | |
- } | |
- } | |
- | |
- private: | |
- std::FILE *m_file; | |
- std::uint64_t m_bytes_read; | |
- std::uint64_t m_cur_buffer_pos; | |
- std::uint64_t m_cur_buffer_filled; | |
- buffer_type *m_cur_buffer; | |
- std::thread *m_io_thread; | |
- | |
- public: | |
- // Default constructor, reads from stdin. | |
- async_stream_reader() { | |
- init("", (8UL << 20), 4UL, 0UL); | |
- } | |
- | |
- // Constructor, default buffer sizes, no skip. | |
- async_stream_reader(std::string filename) { | |
- init(filename, (8UL << 20), 4UL, 0UL); | |
- } | |
- | |
- // Constructor, default buffer sizes, given skip. | |
- async_stream_reader(std::string filename, | |
- std::uint64_t n_skip_bytes) { | |
- init(filename, (8UL << 20), 4UL, n_skip_bytes); | |
- } | |
- | |
- // Constructor, no skip, given buffer sizes. | |
- async_stream_reader(std::string filename, | |
- std::uint64_t total_buf_size_bytes, | |
- std::uint64_t n_buffers) { | |
- init(filename, total_buf_size_bytes, n_buffers, 0UL); | |
- } | |
- | |
- // Constructor, given buffer sizes and skip. | |
- async_stream_reader(std::string filename, | |
- std::uint64_t total_buf_size_bytes, | |
- std::uint64_t n_buffers, | |
- std::uint64_t n_skip_bytes) { | |
- init(filename, total_buf_size_bytes, n_buffers, n_skip_bytes); | |
- } | |
- | |
- // Main initializing function. | |
- void init(std::string filename, | |
- std::uint64_t total_buf_size_bytes, | |
- std::uint64_t n_buffers, | |
- std::uint64_t n_skip_bytes) { | |
- if (filename.empty()) m_file = stdin; | |
- else m_file = utils::file_open_nobuf(filename.c_str(), "r"); | |
- | |
- if (m_file != stdin && n_skip_bytes > 0) | |
- std::fseek(m_file, n_skip_bytes, SEEK_SET); | |
- | |
- // Initialize counters. | |
- m_bytes_read = 0; | |
- m_cur_buffer_pos = 0; | |
- m_cur_buffer_filled = 0; | |
- m_cur_buffer = NULL; | |
- | |
- // Allocate buffers. | |
- std::uint64_t total_buf_size_items = total_buf_size_bytes / sizeof(value_type); | |
- std::uint64_t items_per_buf = std::max(1UL, total_buf_size_items / n_buffers); | |
- m_empty_buffers = new buffer_queue_type(n_buffers, items_per_buf); | |
- m_full_buffers = new buffer_queue_type(); | |
- | |
- // Start the I/O thread. | |
- m_io_thread = new std::thread(io_thread_code<value_type>, this); | |
- } | |
- | |
- // Return the next item in the stream. | |
- inline value_type read() { | |
- if (m_cur_buffer_pos == m_cur_buffer_filled) | |
- receive_new_buffer(); | |
- | |
- return m_cur_buffer->m_content[m_cur_buffer_pos++]; | |
- } | |
- | |
- // Read 'howmany' items into 'dest'. | |
- void read(value_type *dest, std::uint64_t howmany) { | |
- while (howmany > 0) { | |
- if (m_cur_buffer_pos == m_cur_buffer_filled) | |
- receive_new_buffer(); | |
- | |
- std::uint64_t cur_buf_left = m_cur_buffer_filled - m_cur_buffer_pos; | |
- std::uint64_t tocopy = std::min(howmany, cur_buf_left); | |
- for (std::uint64_t i = 0; i < tocopy; ++i) | |
- dest[i] = m_cur_buffer->m_content[m_cur_buffer_pos + i]; | |
- m_cur_buffer_pos += tocopy; | |
- dest += tocopy; | |
- howmany -= tocopy; | |
- } | |
- } | |
- | |
- // Skip the next 'howmany' items in the stream. | |
- void skip(std::uint64_t howmany) { | |
- while (howmany > 0) { | |
- if (m_cur_buffer_pos == m_cur_buffer_filled) | |
- receive_new_buffer(); | |
- | |
- std::uint64_t toskip = std::min(howmany, m_cur_buffer_filled - m_cur_buffer_pos); | |
- m_cur_buffer_pos += toskip; | |
- howmany -= toskip; | |
- } | |
- } | |
- | |
- // Return the next item in the stream. | |
- inline value_type peek() { | |
- if (m_cur_buffer_pos == m_cur_buffer_filled) | |
- receive_new_buffer(); | |
- | |
- return m_cur_buffer->m_content[m_cur_buffer_pos]; | |
- } | |
- | |
- // True iff there are no more items in the stream. | |
- inline bool empty() { | |
- if (m_cur_buffer_pos == m_cur_buffer_filled) | |
- receive_new_buffer(); | |
- | |
- return (m_cur_buffer_pos == m_cur_buffer_filled); | |
- } | |
- | |
- // Return const ptr to internal buffer. | |
- const value_type *get_buf_ptr() const { | |
- return m_cur_buffer->m_content; | |
- } | |
- | |
- // Return the number of items in the internal buffer. | |
- std::uint64_t get_buf_filled() const { | |
- return m_cur_buffer_filled; | |
- } | |
- | |
- // Performed I/O in bytes. | |
- inline std::uint64_t bytes_read() const { | |
- return m_bytes_read; | |
- } | |
- | |
- // Destructor. | |
- ~async_stream_reader() { | |
- // Let the I/O thread know that we're done. | |
- m_empty_buffers->send_stop_signal(); | |
- m_empty_buffers->m_cv.notify_one(); | |
- | |
- // Wait for the thread to finish. | |
- m_io_thread->join(); | |
- | |
- // Clean up. | |
- delete m_empty_buffers; | |
- delete m_full_buffers; | |
- delete m_io_thread; | |
- if (m_file != stdin) | |
- std::fclose(m_file); | |
- | |
- if (m_cur_buffer != NULL) | |
- delete m_cur_buffer; | |
- } | |
-}; | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_READER_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_writer.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_writer.hpp | |
deleted file mode 100644 | |
index da5b853f..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_writer.hpp | |
+++ /dev/null | |
@@ -1,264 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/io/async_stream_writer.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_WRITER_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_WRITER_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <queue> | |
-#include <string> | |
-#include <algorithm> | |
-#include <condition_variable> | |
-#include <mutex> | |
-#include <thread> | |
- | |
-#include "../utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-template<typename value_type> | |
-class async_stream_writer { | |
- private: | |
- template<typename T> | |
- struct buffer { | |
- buffer(std::uint64_t size) { | |
- m_size = size; | |
- m_content = (T *)malloc(m_size * sizeof(T)); | |
- m_filled = 0; | |
- } | |
- | |
- void write_to_file(std::FILE *f) { | |
- utils::write_to_file(m_content, m_filled, f); | |
- m_filled = 0; | |
- } | |
- | |
- ~buffer() { | |
- free(m_content); | |
- } | |
- | |
- inline bool empty() const { return m_filled == 0; } | |
- inline bool full() const { return m_filled == m_size; } | |
- inline std::uint64_t size_in_bytes() const { return sizeof(T) * m_filled; } | |
- inline std::uint64_t free_space() const { return m_size - m_filled; } | |
- | |
- T *m_content; | |
- std::uint64_t m_size; | |
- std::uint64_t m_filled; | |
- }; | |
- | |
- template<typename buffer_type> | |
- struct buffer_queue { | |
- buffer_queue(std::uint64_t n_buffers = 0, std::uint64_t items_per_buf = 0) { | |
- m_signal_stop = false; | |
- for (std::uint64_t i = 0; i < n_buffers; ++i) | |
- m_queue.push(new buffer_type(items_per_buf)); | |
- } | |
- | |
- ~buffer_queue() { | |
- while (!m_queue.empty()) { | |
- buffer_type *buf = m_queue.front(); | |
- m_queue.pop(); | |
- delete buf; | |
- } | |
- } | |
- | |
- buffer_type *pop() { | |
- buffer_type *ret = m_queue.front(); | |
- m_queue.pop(); | |
- return ret; | |
- } | |
- | |
- void push(buffer_type *buf) { | |
- std::lock_guard<std::mutex> lk(m_mutex); | |
- m_queue.push(buf); | |
- } | |
- | |
- void send_stop_signal() { | |
- std::lock_guard<std::mutex> lk(m_mutex); | |
- m_signal_stop = true; | |
- } | |
- | |
- inline bool empty() const { return m_queue.empty(); } | |
- | |
- std::queue<buffer_type*> m_queue; // Must have FIFO property | |
- std::condition_variable m_cv; | |
- std::mutex m_mutex; | |
- bool m_signal_stop; | |
- }; | |
- | |
- private: | |
- typedef buffer<value_type> buffer_type; | |
- typedef buffer_queue<buffer_type> buffer_queue_type; | |
- | |
- buffer_queue_type *m_empty_buffers; | |
- buffer_queue_type *m_full_buffers; | |
- | |
- private: | |
- template<typename T> | |
- static void io_thread_code(async_stream_writer<T> *caller) { | |
- typedef buffer<T> buffer_type; | |
- while (true) { | |
- // Wait for the full buffer (or a stop signal). | |
- std::unique_lock<std::mutex> lk(caller->m_full_buffers->m_mutex); | |
- while (caller->m_full_buffers->empty() && | |
- !(caller->m_full_buffers->m_signal_stop)) | |
- caller->m_full_buffers->m_cv.wait(lk); | |
- | |
- if (caller->m_full_buffers->empty()) { | |
- // We received the stop signal -- exit. | |
- lk.unlock(); | |
- break; | |
- } | |
- | |
- // Extract the buffer from the collection. | |
- buffer_type *buffer = caller->m_full_buffers->pop(); | |
- lk.unlock(); | |
- | |
- // Write the data to disk. | |
- buffer->write_to_file(caller->m_file); | |
- | |
- // Add the (now empty) buffer to the collection | |
- // of empty buffers and notify the waiting thread. | |
- caller->m_empty_buffers->push(buffer); | |
- caller->m_empty_buffers->m_cv.notify_one(); | |
- } | |
- } | |
- | |
- // Get a free buffer from the collection of free buffers. | |
- buffer_type* get_empty_buffer() { | |
- std::unique_lock<std::mutex> lk(m_empty_buffers->m_mutex); | |
- while (m_empty_buffers->empty()) | |
- m_empty_buffers->m_cv.wait(lk); | |
- buffer_type *ret = m_empty_buffers->pop(); | |
- lk.unlock(); | |
- return ret; | |
- } | |
- | |
- private: | |
- std::FILE *m_file; | |
- | |
- std::uint64_t m_bytes_written; | |
- std::uint64_t m_items_per_buf; | |
- | |
- buffer_type *m_cur_buffer; | |
- std::thread *m_io_thread; | |
- | |
- public: | |
- async_stream_writer(std::string filename = std::string(""), | |
- std::uint64_t total_buf_size_bytes = (8UL << 20), | |
- std::uint64_t n_buffers = 4UL, | |
- std::string write_mode = std::string("w")) { | |
- if (filename.empty()) m_file = stdout; | |
- else m_file = utils::file_open_nobuf(filename.c_str(), write_mode); | |
- | |
- // Allocate buffers. | |
- std::uint64_t total_buf_size_items = total_buf_size_bytes / sizeof(value_type); | |
- m_items_per_buf = std::max(1UL, total_buf_size_items / n_buffers); | |
- m_empty_buffers = new buffer_queue_type(n_buffers, m_items_per_buf); | |
- m_full_buffers = new buffer_queue_type(); | |
- | |
- // Initialize empty buffer. | |
- m_cur_buffer = get_empty_buffer(); | |
- m_bytes_written = 0; | |
- | |
- // Start the I/O thread. | |
- m_io_thread = new std::thread(io_thread_code<value_type>, this); | |
- } | |
- | |
- // Write item x to the stream. | |
- inline void write(value_type x) { | |
- m_bytes_written += sizeof(value_type); | |
- m_cur_buffer->m_content[m_cur_buffer->m_filled++] = x; | |
- if (m_cur_buffer->full()) { | |
- m_full_buffers->push(m_cur_buffer); | |
- m_full_buffers->m_cv.notify_one(); | |
- m_cur_buffer = get_empty_buffer(); | |
- } | |
- } | |
- | |
- // Write values[0..length) to the stream. | |
- inline void write(const value_type *values, std::uint64_t length) { | |
- m_bytes_written += length * sizeof(value_type); | |
- while (length > 0) { | |
- std::uint64_t tocopy = std::min(length, m_cur_buffer->free_space()); | |
- std::copy(values, values + tocopy, m_cur_buffer->m_content + m_cur_buffer->m_filled); | |
- m_cur_buffer->m_filled += tocopy; | |
- values += tocopy; | |
- length -= tocopy; | |
- if (m_cur_buffer->full()) { | |
- m_full_buffers->push(m_cur_buffer); | |
- m_full_buffers->m_cv.notify_one(); | |
- m_cur_buffer = get_empty_buffer(); | |
- } | |
- } | |
- } | |
- | |
- // Return performed I/O in bytes. | |
- inline std::uint64_t bytes_written() const { | |
- return m_bytes_written; | |
- } | |
- | |
- // Destructor. | |
- ~async_stream_writer() { | |
- // Send the last incomplete buffer for writing. | |
- if (!(m_cur_buffer->empty())) { | |
- m_full_buffers->push(m_cur_buffer); | |
- m_full_buffers->m_cv.notify_one(); | |
- m_cur_buffer = NULL; | |
- } | |
- | |
- // Let the I/O thread know that we're done. | |
- m_full_buffers->send_stop_signal(); | |
- m_full_buffers->m_cv.notify_one(); | |
- | |
- // Wait for the I/O thread to finish. | |
- m_io_thread->join(); | |
- | |
- // Delete buffers and close the file. | |
- delete m_empty_buffers; | |
- delete m_full_buffers; | |
- delete m_io_thread; | |
- | |
- if (m_file != stdout) | |
- std::fclose(m_file); | |
- | |
- if (m_cur_buffer != NULL) | |
- delete m_cur_buffer; | |
- } | |
-}; | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_WRITER_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/process_halfsegment_pairs.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/process_halfsegment_pairs.hpp | |
deleted file mode 100644 | |
index c6a17fa5..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/process_halfsegment_pairs.hpp | |
+++ /dev/null | |
@@ -1,567 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/process_halfsegment_pairs.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_PROCESS_HALFSEGMENT_PAIRS_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_PROCESS_HALFSEGMENT_PAIRS_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdint> | |
-#include <string> | |
-#include <vector> | |
-#include <algorithm> | |
-#include <omp.h> | |
- | |
-#include "io/async_stream_reader.hpp" | |
-#include "io/async_stream_writer.hpp" | |
-#include "io/async_multi_stream_writer.hpp" | |
-#include "utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-std::uint64_t naive_lcp(std::uint64_t i, std::uint64_t j, std::uint64_t lcp, | |
- std::FILE *f_text, std::uint64_t text_length, std::uint64_t &io_volume) { | |
- static const std::uint64_t bufsize = (1L << 20); | |
- std::uint8_t *b1 = new std::uint8_t[bufsize]; | |
- std::uint8_t *b2 = new std::uint8_t[bufsize]; | |
- std::uint64_t io_vol = 0; | |
- while (true) { | |
- std::uint64_t toread = std::min(bufsize, text_length - std::max(i, j) - lcp); | |
- if (!toread) break; | |
- utils::read_at_offset(b1, i + lcp, toread, f_text); | |
- utils::read_at_offset(b2, j + lcp, toread, f_text); | |
- io_vol += 2UL * toread; | |
- std::uint64_t lcp_delta = 0; | |
- while (lcp_delta < toread && b1[lcp_delta] == b2[lcp_delta]) | |
- ++lcp_delta; | |
- lcp += lcp_delta; | |
- if (lcp_delta < toread) | |
- break; | |
- } | |
- delete[] b1; | |
- delete[] b2; | |
- io_volume += io_vol; | |
- return lcp; | |
-} | |
- | |
-struct buf_item_ext { | |
- std::uint64_t m_left_idx; | |
- std::uint64_t m_right_idx; | |
- std::uint64_t m_ans; | |
- std::uint64_t m_block_id; | |
-}; | |
- | |
-template<typename text_offset_type, typename ext_text_offset_type> | |
-std::uint64_t process_halfsegment_pairs(std::string text_filename, | |
- std::uint64_t text_length, std::uint64_t max_block_size_B, | |
- std::uint64_t max_halfsegment_size, std::uint64_t max_overflow_size, | |
- std::string **pairs_filenames, std::string *irreducible_bits_filenames, | |
- std::uint64_t &total_io_volume) { | |
- fprintf(stderr, " Compute irreducible LCP values:\n"); | |
- long double start = utils::wclock(); | |
- | |
- // Initialize basic parameters. | |
- std::uint64_t n_blocks_B = (2UL * text_length + max_block_size_B - 1) / max_block_size_B; | |
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size; | |
- std::uint64_t sum_irreducible_lcps = 0; | |
- | |
- // Open file with text. | |
- std::FILE *f_text = utils::file_open(text_filename, "r"); | |
- | |
- // Initialize multiwriter of values 2i + PLCP[i]. | |
- typedef async_multi_stream_writer<ext_text_offset_type> lcp_multiwriter_type; | |
- lcp_multiwriter_type *lcp_multiwriter = NULL; | |
- { | |
- static const std::uint64_t n_free_buffers = 4; | |
- std::uint64_t buffer_size = (1UL << 20); | |
- lcp_multiwriter = new lcp_multiwriter_type(buffer_size, n_free_buffers); | |
- for (std::uint64_t block_id = 0; block_id < n_blocks_B; ++block_id) | |
- lcp_multiwriter->add_file(irreducible_bits_filenames[block_id]); | |
- } | |
- | |
- // Allocate halfsegments. | |
- std::uint8_t *left_halfsegment = (std::uint8_t *)malloc(max_halfsegment_size + max_overflow_size); | |
- std::uint8_t *right_halfsegment = (std::uint8_t *)malloc(max_halfsegment_size + max_overflow_size); | |
- | |
- // Allocate buffers. | |
- static const std::uint64_t local_buf_size = (1UL << 20); | |
- text_offset_type *idx_buf = new text_offset_type[local_buf_size * 2]; | |
-#ifdef _OPENMP | |
- buf_item_ext *ans_buf = new buf_item_ext[local_buf_size]; | |
-#endif | |
- | |
- // Processing of halfsegment pairs follows. | |
- for (std::uint64_t left_halfsegment_id = 0; left_halfsegment_id < n_halfsegments; ++left_halfsegment_id) { | |
- std::uint64_t left_halfsegment_beg = left_halfsegment_id * max_halfsegment_size; | |
- std::uint64_t left_halfsegment_end = std::min(left_halfsegment_beg + max_halfsegment_size, text_length); | |
- std::uint64_t left_halfsegment_ext_end = std::min(left_halfsegment_end + max_overflow_size, text_length); | |
- std::uint64_t left_halfsegment_ext_size = left_halfsegment_ext_end - left_halfsegment_beg; | |
- bool left_halfsegment_loaded = false; | |
- | |
- // Scan all halfsegments to the right of left_halfsegment_id. | |
- for (std::uint64_t right_halfsegment_id = left_halfsegment_id; right_halfsegment_id < n_halfsegments; right_halfsegment_id++) { | |
- std::uint64_t right_halfsegment_beg = right_halfsegment_id * max_halfsegment_size; | |
- std::uint64_t right_halfsegment_end = std::min(right_halfsegment_beg + max_halfsegment_size, text_length); | |
- std::uint64_t right_halfsegment_ext_end = std::min(right_halfsegment_end + max_overflow_size, text_length); | |
- std::uint64_t right_halfsegment_ext_size = right_halfsegment_ext_end - right_halfsegment_beg; | |
- | |
- // Check if that pair of halfsegments has any associated pairs. | |
- std::string pairs_filename = pairs_filenames[left_halfsegment_id][right_halfsegment_id]; | |
- if (utils::file_exists(pairs_filename) == false || utils::file_size(pairs_filename) == 0) { | |
- if (utils::file_exists(pairs_filename)) | |
- utils::file_delete(pairs_filename); | |
- continue; | |
- } | |
- | |
- // Print initial progress message. | |
- fprintf(stderr, " Process halfsegments %lu and %lu: ", left_halfsegment_id, right_halfsegment_id); | |
- long double halfsegment_process_start = utils::wclock(); | |
- std::uint64_t local_lcp_sum = 0; | |
- std::uint64_t extra_io = 0; | |
- std::uint64_t io_vol = 0; | |
- | |
- // Initialize reading from file associated with current pair of halfsegments. | |
- typedef async_stream_reader<text_offset_type> pair_reader_type; | |
- std::uint64_t n_pairs = utils::file_size(pairs_filename) / (2 * sizeof(text_offset_type)); | |
- pair_reader_type *pair_reader = new pair_reader_type(pairs_filename); | |
- | |
- // Read left halfsegment from disk (if it wasn't already) | |
- if (left_halfsegment_loaded == false) { | |
- utils::read_at_offset(left_halfsegment, left_halfsegment_beg, left_halfsegment_ext_size, text_filename); | |
- left_halfsegment_loaded = true; | |
- extra_io += left_halfsegment_ext_size; | |
- } | |
- | |
- // Read right halfsegment from disk. | |
- std::uint8_t *right_halfsegment_ptr = right_halfsegment; | |
- if (right_halfsegment_id != left_halfsegment_id) { | |
- utils::read_at_offset(right_halfsegment, right_halfsegment_beg, right_halfsegment_ext_size, text_filename); | |
- extra_io += right_halfsegment_ext_size; | |
- } else right_halfsegment_ptr = left_halfsegment; | |
- | |
- std::uint64_t pairs_processed = 0; | |
- while (pairs_processed < n_pairs) { | |
- std::uint64_t filled = std::min(n_pairs - pairs_processed, local_buf_size); | |
- pair_reader->read(idx_buf, filled * 2); | |
- | |
-#ifdef _OPENMP | |
- std::vector<std::uint64_t> long_lcps; | |
- std::uint64_t max_threads = omp_get_max_threads(); | |
- std::uint64_t max_block_size = (filled + max_threads - 1) / max_threads; | |
- std::uint64_t n_threads = (filled + max_block_size - 1) / max_block_size; | |
- #pragma omp parallel num_threads(n_threads) | |
- { | |
- std::uint64_t thread_id = omp_get_thread_num(); | |
- std::uint64_t block_beg = thread_id * max_block_size; | |
- std::uint64_t block_end = std::min(block_beg + max_block_size, filled); | |
- std::vector<std::uint64_t> local_long_lcps; | |
- std::uint64_t thread_lcp_sum = 0; | |
- | |
- for (std::uint64_t j = block_beg; j < block_end; ++j) { | |
- std::uint64_t i = idx_buf[2 * j]; | |
- std::uint64_t phi_i = idx_buf[2 * j + 1]; | |
- std::uint64_t left_idx = i; | |
- std::uint64_t right_idx = phi_i; | |
- if (!(left_halfsegment_beg <= left_idx && left_idx < left_halfsegment_end && | |
- right_halfsegment_beg <= right_idx && right_idx < right_halfsegment_end)) | |
- std::swap(left_idx, right_idx); | |
- | |
- // Compute LCP value. | |
- std::uint64_t lcp = 0; | |
- while (left_idx + lcp < left_halfsegment_ext_end && | |
- right_idx + lcp < right_halfsegment_ext_end && | |
- left_halfsegment[left_idx - left_halfsegment_beg + lcp] == | |
- right_halfsegment_ptr[right_idx - right_halfsegment_beg + lcp]) | |
- ++lcp; | |
- | |
- // If the LCP computation cannot be completed, add it to the list of unfinished LCPs. | |
- if ((left_idx + lcp == left_halfsegment_ext_end && left_halfsegment_ext_end < text_length) || | |
- (right_idx + lcp == right_halfsegment_ext_end && right_halfsegment_ext_end < text_length)) { | |
- ans_buf[j].m_left_idx = left_idx; | |
- ans_buf[j].m_right_idx = right_idx; | |
- ans_buf[j].m_ans = lcp; | |
- local_long_lcps.push_back(j); | |
- } else { | |
- std::uint64_t pos_in_B = 2UL * i + lcp; | |
- std::uint64_t block_id = pos_in_B / max_block_size_B; | |
- ans_buf[j].m_ans = pos_in_B; | |
- ans_buf[j].m_block_id = block_id; | |
- thread_lcp_sum += lcp; | |
- } | |
- } | |
- | |
- #pragma omp critical | |
- { | |
- // Concatenate the list of long LCP processed by a given thread with a global list. | |
- long_lcps.insert(long_lcps.end(), local_long_lcps.begin(), local_long_lcps.end()); | |
- local_lcp_sum += thread_lcp_sum; | |
- } | |
- } | |
- | |
- // Finish the computatino of long LCPs using naive method. | |
- for (std::uint64_t j = 0; j < long_lcps.size(); ++j) { | |
- std::uint64_t which = long_lcps[j]; | |
- | |
- // Retreive indexes from the buffer. | |
- std::uint64_t i = idx_buf[2 * which]; | |
- std::uint64_t left_idx = ans_buf[which].m_left_idx; | |
- std::uint64_t right_idx = ans_buf[which].m_right_idx; | |
- std::uint64_t lcp = ans_buf[which].m_ans; | |
- | |
- // Compute LCP. | |
- lcp = naive_lcp(left_idx, right_idx, lcp, f_text, text_length, io_vol); | |
- | |
- // Compute answer. | |
- std::uint64_t pos_in_B = 2UL * i + lcp; | |
- std::uint64_t block_id = pos_in_B / max_block_size_B; | |
- | |
- // Write answer to buffer. | |
- ans_buf[which].m_ans = pos_in_B; | |
- ans_buf[which].m_block_id = block_id; | |
- | |
- // Update stats. | |
- local_lcp_sum += lcp; | |
- } | |
- | |
- // Write LCPs to file. | |
- for (std::uint64_t j = 0; j < filled; ++j) | |
- lcp_multiwriter->write_to_ith_file(ans_buf[j].m_block_id, ans_buf[j].m_ans); | |
- | |
-#else | |
- for (std::uint64_t j = 0; j < filled; ++j) { | |
- std::uint64_t i = idx_buf[2 * j]; | |
- std::uint64_t phi_i = idx_buf[2 * j + 1]; | |
- std::uint64_t left_idx = i; | |
- std::uint64_t right_idx = phi_i; | |
- if (!(left_halfsegment_beg <= left_idx && left_idx < left_halfsegment_end && | |
- right_halfsegment_beg <= right_idx && right_idx < right_halfsegment_end)) | |
- std::swap(left_idx, right_idx); | |
- | |
- // Compute LCP value. | |
- std::uint64_t lcp = 0; | |
- while (left_idx + lcp < left_halfsegment_ext_end && right_idx + lcp < right_halfsegment_ext_end && | |
- left_halfsegment[left_idx - left_halfsegment_beg + lcp] == right_halfsegment_ptr[right_idx - right_halfsegment_beg + lcp]) | |
- ++lcp; | |
- | |
- // Finish the long LCP using naive method. | |
- if ((left_idx + lcp == left_halfsegment_ext_end && left_halfsegment_ext_end < text_length) || | |
- (right_idx + lcp == right_halfsegment_ext_end && right_halfsegment_ext_end < text_length)) | |
- lcp = naive_lcp(left_idx, right_idx, lcp, f_text, text_length, io_vol); | |
- | |
- // Write LCP to file. | |
- std::uint64_t pos_in_B = 2 * i + lcp; | |
- std::uint64_t block_id = pos_in_B / max_block_size_B; | |
- lcp_multiwriter->write_to_ith_file(block_id, pos_in_B); | |
- local_lcp_sum += lcp; | |
- } | |
-#endif | |
- | |
- pairs_processed += filled; | |
- } | |
- | |
- // Update I/O volume. | |
- io_vol += pair_reader->bytes_read() + extra_io + n_pairs * sizeof(ext_text_offset_type); | |
- total_io_volume += io_vol; | |
- | |
- // Clean up. | |
- delete pair_reader; | |
- utils::file_delete(pairs_filename); | |
- | |
- // Update statistics. | |
- sum_irreducible_lcps += local_lcp_sum; | |
- | |
- // Print summary. | |
- long double avg_lcp = (long double)local_lcp_sum / (long double)std::max(1UL, n_pairs); | |
- long double elapsed = utils::wclock() - halfsegment_process_start; | |
- fprintf(stderr, "time = %.1Lfs, I/O = %.1LfMiB/s, avg_lcp = %.2Lf, total I/O vol = %.2Lfn\n", elapsed, | |
- (1.L * io_vol / (1L << 20)) / elapsed, avg_lcp, (1.L * total_io_volume) / text_length); | |
- } | |
- } | |
- | |
- // Clean up. | |
- delete[] idx_buf; | |
-#ifdef _OPENMP | |
- delete[] ans_buf; | |
-#endif | |
- delete lcp_multiwriter; | |
- std::fclose(f_text); | |
- free(left_halfsegment); | |
- free(right_halfsegment); | |
- | |
- // Print summary. | |
- long double total_time = utils::wclock() - start; | |
- fprintf(stderr, " Total time: %.2Lfs, total I/O vol = %.2Lfn\n", | |
- total_time, (1.L * total_io_volume) / text_length); | |
- | |
- return sum_irreducible_lcps; | |
-} | |
- | |
-struct buf_item { | |
- std::uint64_t m_left_idx; | |
- std::uint64_t m_right_idx; | |
- std::uint64_t m_ans; | |
-}; | |
- | |
-template<typename text_offset_type, typename ext_text_offset_type> | |
-std::uint64_t process_halfsegment_pairs(std::string text_filename, | |
- std::uint64_t text_length, std::uint64_t max_halfsegment_size, | |
- std::uint64_t max_overflow_size, std::string **pairs_filenames, | |
- std::string output_filename, std::uint64_t &total_io_volume) { | |
- fprintf(stderr, " Compute irreducible LCP values:\n"); | |
- long double start = utils::wclock(); | |
- | |
- // Initialize basic parameters. | |
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size; | |
- std::uint64_t sum_irreducible_lcps = 0; | |
- | |
- // Open file with text. | |
- std::FILE *f_text = utils::file_open(text_filename, "r"); | |
- | |
- // Initialize writer of values 2i + PLCP[i]. | |
- typedef async_stream_writer<ext_text_offset_type> lcp_writer_type; | |
- lcp_writer_type *lcp_writer = new lcp_writer_type(output_filename); | |
- | |
- // Allocate halfsegments. | |
- std::uint8_t *left_halfsegment = (std::uint8_t *)malloc(max_halfsegment_size + max_overflow_size); | |
- std::uint8_t *right_halfsegment = (std::uint8_t *)malloc(max_halfsegment_size + max_overflow_size); | |
- | |
- // Allocate buffers. | |
- static const std::uint64_t local_buf_size = (1UL << 20); | |
- text_offset_type *idx_buf = new text_offset_type[local_buf_size * 2]; | |
-#ifdef _OPENMP | |
- buf_item *ans_buf = new buf_item[local_buf_size]; | |
-#endif | |
- | |
- // Processing of halfsegment pairs follows. | |
- for (std::uint64_t left_halfsegment_id = 0; left_halfsegment_id < n_halfsegments; ++left_halfsegment_id) { | |
- std::uint64_t left_halfsegment_beg = left_halfsegment_id * max_halfsegment_size; | |
- std::uint64_t left_halfsegment_end = std::min(left_halfsegment_beg + max_halfsegment_size, text_length); | |
- std::uint64_t left_halfsegment_ext_end = std::min(left_halfsegment_end + max_overflow_size, text_length); | |
- std::uint64_t left_halfsegment_ext_size = left_halfsegment_ext_end - left_halfsegment_beg; | |
- bool left_halfsegment_loaded = false; | |
- | |
- // Scan all halfsegments to the right of left_halfsegment_id. | |
- for (std::uint64_t right_halfsegment_id = left_halfsegment_id; right_halfsegment_id < n_halfsegments; right_halfsegment_id++) { | |
- std::uint64_t right_halfsegment_beg = right_halfsegment_id * max_halfsegment_size; | |
- std::uint64_t right_halfsegment_end = std::min(right_halfsegment_beg + max_halfsegment_size, text_length); | |
- std::uint64_t right_halfsegment_ext_end = std::min(right_halfsegment_end + max_overflow_size, text_length); | |
- std::uint64_t right_halfsegment_ext_size = right_halfsegment_ext_end - right_halfsegment_beg; | |
- | |
- // Check if that pair of halfsegments has any associated pairs. | |
- std::string pairs_filename = pairs_filenames[left_halfsegment_id][right_halfsegment_id]; | |
- if (utils::file_exists(pairs_filename) == false || utils::file_size(pairs_filename) == 0) { | |
- if (utils::file_exists(pairs_filename)) | |
- utils::file_delete(pairs_filename); | |
- continue; | |
- } | |
- | |
- // Print initial progress message. | |
- fprintf(stderr, " Process halfsegments %lu and %lu: ", left_halfsegment_id, right_halfsegment_id); | |
- long double halfsegment_process_start = utils::wclock(); | |
- std::uint64_t local_lcp_sum = 0; | |
- std::uint64_t extra_io = 0; | |
- std::uint64_t io_vol = 0; | |
- | |
- // Initialize reading from file associated with current pair of halfsegments. | |
- typedef async_stream_reader<text_offset_type> pair_reader_type; | |
- std::uint64_t n_pairs = utils::file_size(pairs_filename) / (2 * sizeof(text_offset_type)); | |
- pair_reader_type *pair_reader = new pair_reader_type(pairs_filename); | |
- | |
- // Read left halfsegment from disk (if it wasn't already) | |
- if (left_halfsegment_loaded == false) { | |
- utils::read_at_offset(left_halfsegment, left_halfsegment_beg, left_halfsegment_ext_size, text_filename); | |
- left_halfsegment_loaded = true; | |
- extra_io += left_halfsegment_ext_size; | |
- } | |
- | |
- // Read right halfsegment from disk. | |
- std::uint8_t *right_halfsegment_ptr = right_halfsegment; | |
- if (right_halfsegment_id != left_halfsegment_id) { | |
- utils::read_at_offset(right_halfsegment, right_halfsegment_beg, right_halfsegment_ext_size, text_filename); | |
- extra_io += right_halfsegment_ext_size; | |
- } else right_halfsegment_ptr = left_halfsegment; | |
- | |
- std::uint64_t pairs_processed = 0; | |
- while (pairs_processed < n_pairs) { | |
- std::uint64_t filled = std::min(n_pairs - pairs_processed, local_buf_size); | |
- pair_reader->read(idx_buf, filled * 2); | |
- | |
-#ifdef _OPENMP | |
- std::vector<std::uint64_t> long_lcps; | |
- std::uint64_t max_threads = omp_get_max_threads(); | |
- std::uint64_t max_block_size = (filled + max_threads - 1) / max_threads; | |
- std::uint64_t n_threads = (filled + max_block_size - 1) / max_block_size; | |
- #pragma omp parallel num_threads(n_threads) | |
- { | |
- std::uint64_t thread_id = omp_get_thread_num(); | |
- std::uint64_t block_beg = thread_id * max_block_size; | |
- std::uint64_t block_end = std::min(block_beg + max_block_size, filled); | |
- std::vector<std::uint64_t> local_long_lcps; | |
- std::uint64_t thread_lcp_sum = 0; | |
- | |
- for (std::uint64_t j = block_beg; j < block_end; ++j) { | |
- std::uint64_t i = idx_buf[2 * j]; | |
- std::uint64_t phi_i = idx_buf[2 * j + 1]; | |
- std::uint64_t left_idx = i; | |
- std::uint64_t right_idx = phi_i; | |
- if (!(left_halfsegment_beg <= left_idx && left_idx < left_halfsegment_end && | |
- right_halfsegment_beg <= right_idx && right_idx < right_halfsegment_end)) | |
- std::swap(left_idx, right_idx); | |
- | |
- // Compute LCP value. | |
- std::uint64_t lcp = 0; | |
- while (left_idx + lcp < left_halfsegment_ext_end && | |
- right_idx + lcp < right_halfsegment_ext_end && | |
- left_halfsegment[left_idx - left_halfsegment_beg + lcp] == | |
- right_halfsegment_ptr[right_idx - right_halfsegment_beg + lcp]) | |
- ++lcp; | |
- | |
- // If the LCP computation cannot be completed, add it to the list of unfinished LCPs. | |
- if ((left_idx + lcp == left_halfsegment_ext_end && left_halfsegment_ext_end < text_length) || | |
- (right_idx + lcp == right_halfsegment_ext_end && right_halfsegment_ext_end < text_length)) { | |
- ans_buf[j].m_left_idx = left_idx; | |
- ans_buf[j].m_right_idx = right_idx; | |
- ans_buf[j].m_ans = lcp; | |
- local_long_lcps.push_back(j); | |
- } else { | |
- std::uint64_t pos_in_B = 2UL * i + lcp; | |
- ans_buf[j].m_ans = pos_in_B; | |
- thread_lcp_sum += lcp; | |
- } | |
- } | |
- | |
- #pragma omp critical | |
- { | |
- // Concatenate the list of long LCP processed by a given thread with a global list. | |
- long_lcps.insert(long_lcps.end(), local_long_lcps.begin(), local_long_lcps.end()); | |
- local_lcp_sum += thread_lcp_sum; | |
- } | |
- } | |
- | |
- // Finish the computation of long LCPs using naive method. | |
- for (std::uint64_t j = 0; j < long_lcps.size(); ++j) { | |
- std::uint64_t which = long_lcps[j]; | |
- | |
- // Retreive indexes from the buffer. | |
- std::uint64_t i = idx_buf[2 * which]; | |
- std::uint64_t left_idx = ans_buf[which].m_left_idx; | |
- std::uint64_t right_idx = ans_buf[which].m_right_idx; | |
- std::uint64_t lcp = ans_buf[which].m_ans; | |
- | |
- // Compute LCP. | |
- lcp = naive_lcp(left_idx, right_idx, lcp, f_text, text_length, io_vol); | |
- | |
- // Compute answer. | |
- std::uint64_t pos_in_B = 2UL * i + lcp; | |
- | |
- // Write answer to buffer. | |
- ans_buf[which].m_ans = pos_in_B; | |
- | |
- // Update stats. | |
- local_lcp_sum += lcp; | |
- } | |
- | |
- // Write LCPs to file. | |
- for (std::uint64_t j = 0; j < filled; ++j) | |
- lcp_writer->write(ans_buf[j].m_ans); | |
-#else | |
- for (std::uint64_t j = 0; j < filled; ++j) { | |
- std::uint64_t i = idx_buf[2 * j]; | |
- std::uint64_t phi_i = idx_buf[2 * j + 1]; | |
- std::uint64_t left_idx = i; | |
- std::uint64_t right_idx = phi_i; | |
- if (!(left_halfsegment_beg <= left_idx && left_idx < left_halfsegment_end && | |
- right_halfsegment_beg <= right_idx && right_idx < right_halfsegment_end)) | |
- std::swap(left_idx, right_idx); | |
- | |
- // Compute LCP value. | |
- std::uint64_t lcp = 0; | |
- while (left_idx + lcp < left_halfsegment_ext_end && right_idx + lcp < right_halfsegment_ext_end && | |
- left_halfsegment[left_idx - left_halfsegment_beg + lcp] == right_halfsegment_ptr[right_idx - right_halfsegment_beg + lcp]) | |
- ++lcp; | |
- | |
- // Finish the computation of long LCP using naive method. | |
- if ((left_idx + lcp == left_halfsegment_ext_end && left_halfsegment_ext_end < text_length) || | |
- (right_idx + lcp == right_halfsegment_ext_end && right_halfsegment_ext_end < text_length)) | |
- lcp = naive_lcp(left_idx, right_idx, lcp, f_text, text_length, io_vol); | |
- | |
- // Write LCP to file. | |
- std::uint64_t pos_in_B = 2 * i + lcp; | |
- lcp_writer->write(pos_in_B); | |
- local_lcp_sum += lcp; | |
- } | |
-#endif | |
- | |
- pairs_processed += filled; | |
- } | |
- | |
- // Update I/O volume. | |
- io_vol += pair_reader->bytes_read() + extra_io + n_pairs * sizeof(ext_text_offset_type); | |
- total_io_volume += io_vol; | |
- | |
- // Clean up. | |
- delete pair_reader; | |
- utils::file_delete(pairs_filename); | |
- | |
- // Update statistics. | |
- sum_irreducible_lcps += local_lcp_sum; | |
- | |
- // Print summary. | |
- long double avg_lcp = (long double)local_lcp_sum / (long double)std::max(1UL, n_pairs); | |
- long double elapsed = utils::wclock() - halfsegment_process_start; | |
- fprintf(stderr, "time = %.1Lfs, I/O = %.1LfMiB/s, avg_lcp = %.2Lf, total I/O vol = %.2Lfn\n", elapsed, | |
- (1.L * io_vol / (1L << 20)) / elapsed, avg_lcp, (1.L * total_io_volume) / text_length); | |
- } | |
- } | |
- | |
- // Clean up. | |
- delete[] idx_buf; | |
-#ifdef _OPENMP | |
- delete[] ans_buf; | |
-#endif | |
- delete lcp_writer; | |
- std::fclose(f_text); | |
- free(left_halfsegment); | |
- free(right_halfsegment); | |
- | |
- // Print summary. | |
- long double total_time = utils::wclock() - start; | |
- fprintf(stderr, " Total time: %.2Lfs, total I/O vol = %.2Lfn\n", | |
- total_time, (1.L * total_io_volume) / text_length); | |
- | |
- return sum_irreducible_lcps; | |
-} | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_PROCESS_HALFSEGMENT_PAIRS_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/set_bits.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/set_bits.hpp | |
deleted file mode 100644 | |
index 1168c5dd..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/set_bits.hpp | |
+++ /dev/null | |
@@ -1,219 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/set_bits.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_SET_BITS_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_SET_BITS_HPP_INCLUDED | |
- | |
-#include <cstdint> | |
-#include <vector> | |
-#include <algorithm> | |
-#include <omp.h> | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
- | |
-void set_bits(std::uint64_t *bv, std::uint64_t *tab, std::uint64_t tab_size) { | |
- for (std::uint64_t i = 0; i < tab_size; ++i) { | |
- std::uint64_t idx = tab[i]; | |
- bv[idx >> 6] |= (1UL << (idx & 63)); | |
- } | |
-} | |
- | |
-#ifdef _OPENMP | |
-template<typename int_type> | |
-void permute_into_small_buckets(int_type *tab, | |
- int_type *temp, std::uint64_t length, | |
- std::uint64_t lower_bound, std::uint64_t upper_bound, | |
- std::uint64_t max_bucket_size, | |
- std::vector<std::uint64_t> &output_bucket_sizes) { | |
- // Move all items into temp array. | |
- #pragma omp parallel for | |
- for (std::uint64_t j = 0; j < length; ++j) | |
- temp[j] = tab[j]; | |
- | |
- // Compute bucket range. Note that bucket range is understood | |
- // as the length of ranges of keys assigned to a bucket; | |
- // bucket size is the number of items inside the bucket. | |
- static const std::uint64_t max_buckets = 1024; | |
- std::uint64_t value_range = upper_bound - lower_bound; | |
- std::uint64_t bucket_range_log = 6; | |
- std::uint64_t bucket_range = 64; | |
- while ((value_range + bucket_range - 1) / bucket_range > max_buckets) { | |
- ++bucket_range_log; | |
- bucket_range <<= 1; | |
- } | |
- std::uint64_t n_buckets = (value_range + bucket_range - 1) / bucket_range; | |
- | |
- // Allocate bucket counts. | |
- std::uint64_t max_threads = omp_get_max_threads(); | |
- std::uint64_t max_range_size = (length + max_threads - 1) / max_threads; | |
- std::uint64_t n_threads = (length + max_range_size - 1) / max_range_size; | |
- std::uint64_t **bucket_ptr = new std::uint64_t*[n_threads]; | |
- for (std::uint64_t thread_id = 0; thread_id < n_threads; ++thread_id) | |
- bucket_ptr[thread_id] = new std::uint64_t[n_buckets]; | |
- | |
- // Permute items into buckets. | |
- #pragma omp parallel num_threads(n_threads) | |
- { | |
- std::uint64_t thread_id = omp_get_thread_num(); | |
- std::uint64_t range_beg = thread_id * max_range_size; | |
- std::uint64_t range_end = std::min(range_beg + max_range_size, length); | |
- std::uint64_t *local_bucket_ptr = bucket_ptr[thread_id]; | |
- std::fill(local_bucket_ptr, local_bucket_ptr + n_buckets, 0UL); | |
- | |
- // Compute bucket counts. | |
- for (std::uint64_t j = range_beg; j < range_end; ++j) { | |
- std::uint64_t bucket_id = (((std::uint64_t)tab[j] - lower_bound) >> bucket_range_log); | |
- ++local_bucket_ptr[bucket_id]; | |
- } | |
- | |
- // Compute destination pointers. | |
- #pragma omp barrier | |
- #pragma omp single | |
- { | |
- std::uint64_t total_buckets_size = 0; | |
- for (std::uint64_t bucket_id = 0; bucket_id < n_buckets; ++bucket_id) { | |
- std::uint64_t this_bucket_size = 0; | |
- for (std::uint64_t i = 0; i < n_threads; ++i) { | |
- std::uint64_t local_bucket_size = bucket_ptr[i][bucket_id]; | |
- bucket_ptr[i][bucket_id] = total_buckets_size + this_bucket_size; | |
- this_bucket_size += local_bucket_size; | |
- } | |
- total_buckets_size += this_bucket_size; | |
- } | |
- } | |
- | |
- // Move items into buckets. | |
- for (std::uint64_t j = range_beg; j < range_end; ++j) { | |
- std::uint64_t bucket_id = ((temp[j] - lower_bound) >> bucket_range_log); | |
- std::uint64_t dest_pos = local_bucket_ptr[bucket_id]++; | |
- tab[dest_pos] = temp[j]; | |
- } | |
- } | |
- | |
- // Free the memory for bucket_ptr. Keep only bucket sizes. | |
- std::vector<std::uint64_t> unrefined_bucket_sizes(n_buckets); | |
- for (std::uint64_t bucket_id = 0; bucket_id < n_buckets; ++bucket_id) { | |
- unrefined_bucket_sizes[bucket_id] = bucket_ptr[n_threads - 1][bucket_id]; | |
- if (bucket_id > 0) | |
- unrefined_bucket_sizes[bucket_id] -= bucket_ptr[n_threads - 1][bucket_id - 1]; | |
- } | |
- for (std::uint64_t thread_id = 0; thread_id < n_threads; ++thread_id) | |
- delete[] bucket_ptr[thread_id]; | |
- delete[] bucket_ptr; | |
- | |
- // Compute the output. If necessary, refine large buckets recursively. | |
- std::uint64_t cur_bucket_beg = 0; | |
- for (std::uint64_t bucket_id = 0; bucket_id < n_buckets; ++bucket_id) { | |
- if (unrefined_bucket_sizes[bucket_id] > max_bucket_size) { | |
- std::uint64_t lower_bound_rec = lower_bound + bucket_id * bucket_range; | |
- std::uint64_t upper_bound_rec = std::min(lower_bound_rec + bucket_range, upper_bound); | |
- permute_into_small_buckets(tab + cur_bucket_beg, temp, unrefined_bucket_sizes[bucket_id], | |
- lower_bound_rec, upper_bound_rec, max_bucket_size, output_bucket_sizes); | |
- } else output_bucket_sizes.push_back(unrefined_bucket_sizes[bucket_id]); | |
- cur_bucket_beg += unrefined_bucket_sizes[bucket_id]; | |
- } | |
-} | |
- | |
-template<typename int_type> | |
-void set_bits(std::uint64_t *bv, | |
- std::uint64_t bv_size, int_type *tab, | |
- std::uint64_t tab_size, int_type *temp) { | |
- std::uint64_t max_threads = omp_get_max_threads(); | |
- | |
- // Partition the input array into buckets. | |
- std::vector<std::uint64_t> bucket_sizes; | |
- { | |
- // First, partition the input array into small buckets. There may be | |
- // a lot of them, so they need to be merged into larger buckets. | |
- std::vector<std::uint64_t> small_bucket_sizes; | |
- std::uint64_t ideal_bucket_size = std::max(512UL, (tab_size + max_threads - 1) / max_threads); | |
- std::uint64_t max_bucket_size = 2UL * ideal_bucket_size; | |
- permute_into_small_buckets(tab, temp, tab_size, 0, | |
- bv_size, max_bucket_size, small_bucket_sizes); | |
- | |
- // Merge small buckets into at most max_threads final buckets. | |
- std::uint64_t n_small_buckets = small_bucket_sizes.size(); | |
- std::uint64_t small_bucket_ptr = 0; | |
- for (std::uint64_t bucket_id = 0; bucket_id < max_threads; ++bucket_id) { | |
- if (small_bucket_ptr < n_small_buckets) { | |
- std::uint64_t cur_bucket_total_size = small_bucket_sizes[small_bucket_ptr++]; | |
- std::uint64_t cur_bucket_range_end = small_bucket_ptr; | |
- | |
- // Keep adding buckets as long as we are | |
- // getting closer to the ideal bucket size. | |
- while (cur_bucket_range_end < n_small_buckets && (std::abs((std::int64_t)(cur_bucket_total_size + | |
- small_bucket_sizes[cur_bucket_range_end]) - (std::int64_t)ideal_bucket_size) <= | |
- std::abs((std::int64_t)cur_bucket_total_size - (std::int64_t)ideal_bucket_size) || | |
- (bucket_id + 1 == max_threads))) | |
- cur_bucket_total_size += small_bucket_sizes[cur_bucket_range_end++]; | |
- | |
- // Add the final bucket to the list. | |
- bucket_sizes.push_back(cur_bucket_total_size); | |
- small_bucket_ptr = cur_bucket_range_end; | |
- } | |
- } | |
- } | |
- | |
- // Update the bits in bv. The above partitioning guarantees that | |
- // no thread will attempt to update the same word in bv and | |
- // that all threads will update roughly the same amount of bits. | |
- // Lastly, the above guarantees bucket_sizes.size() <= max_threads. | |
- { | |
- // Partial (exclusive) sum over bucket_sizes. | |
- std::uint64_t total_bucket_size = 0; | |
- std::uint64_t n_buckets = bucket_sizes.size(); | |
- for (std::uint64_t bucket_id = 0; bucket_id < n_buckets; ++bucket_id) { | |
- std::uint64_t this_bucket_size = bucket_sizes[bucket_id]; | |
- bucket_sizes[bucket_id] = total_bucket_size; | |
- total_bucket_size += this_bucket_size; | |
- } | |
- | |
- // Set the bits in the bitvector. | |
- #pragma omp parallel num_threads(n_buckets) | |
- { | |
- std::uint64_t bucket_id = omp_get_thread_num(); | |
- std::uint64_t bucket_beg = bucket_sizes[bucket_id]; | |
- std::uint64_t bucket_end = (bucket_id + 1 == n_buckets) ? total_bucket_size : bucket_sizes[bucket_id + 1]; | |
- for (std::uint64_t j = bucket_beg; j < bucket_end; ++j) { | |
- std::uint64_t bv_idx = tab[j]; | |
- bv[bv_idx >> 6] |= (1UL << (bv_idx & 63)); | |
- } | |
- } | |
- } | |
-} | |
-#endif | |
- | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_SET_BITS_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.cpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.cpp | |
deleted file mode 100644 | |
index 35e57e0d..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.cpp | |
+++ /dev/null | |
@@ -1,180 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/utils.cpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstdint> | |
-#include <cstring> | |
-#include <cerrno> | |
-#include <sys/time.h> | |
-#include <sys/types.h> | |
-#include <sys/stat.h> | |
-#include <fcntl.h> | |
-#include <unistd.h> | |
-#include <string> | |
-#include <fstream> | |
-#include <algorithm> | |
- | |
-#include "utils.hpp" | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
-namespace utils { | |
- | |
-long double wclock() { | |
- timeval tim; | |
- gettimeofday(&tim, NULL); | |
- return tim.tv_sec + (tim.tv_usec / 1000000.0L); | |
-} | |
- | |
-std::FILE *file_open(std::string filename, std::string mode) { | |
- std::FILE *f = std::fopen(filename.c_str(), mode.c_str()); | |
- if (f == NULL) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- return f; | |
-} | |
- | |
-std::FILE *file_open_nobuf(std::string filename, std::string mode) { | |
- std::FILE *f = std::fopen(filename.c_str(), mode.c_str()); | |
- if (f == NULL) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- if(std::setvbuf(f, NULL, _IONBF, 0) != 0) { | |
- perror("setvbuf failed"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- return f; | |
-} | |
- | |
-std::uint64_t file_size(std::string filename) { | |
- std::FILE *f = file_open_nobuf(filename, "r"); | |
- std::fseek(f, 0, SEEK_END); | |
- long size = std::ftell(f); | |
- if (size < 0) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- std::fclose(f); | |
- return (std::uint64_t)size; | |
-} | |
- | |
-bool file_exists(std::string filename) { | |
- std::FILE *f = std::fopen(filename.c_str(), "r"); | |
- bool result = (f != NULL); | |
- if (f != NULL) | |
- std::fclose(f); | |
- return result; | |
-} | |
- | |
-void file_delete(std::string filename) { | |
- int res = std::remove(filename.c_str()); | |
- if (res != 0) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
-} | |
- | |
-std::string absolute_path(std::string filename) { | |
- char path[1 << 12]; | |
- bool created = false; | |
- if (!file_exists(filename)) { | |
- std::fclose(file_open(filename, "w")); | |
- created = true; | |
- } | |
- if (!realpath(filename.c_str(), path)) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- if (created) | |
- file_delete(filename); | |
- return std::string(path); | |
-} | |
- | |
-void drop_disk_pages(std::string filename) { | |
- int fd = open(filename.c_str(), O_RDWR); | |
- if (fd == -1) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- off_t length = lseek(fd, 0, SEEK_END); | |
- lseek(fd, 0L, SEEK_SET); | |
- posix_fadvise(fd, 0, length, POSIX_FADV_DONTNEED); | |
- close(fd); | |
-} | |
- | |
-std::int32_t random_int32(std::int32_t p, std::int32_t r) { | |
- return p + rand() % (r - p + 1); | |
-} | |
- | |
-std::int64_t random_int64(std::int64_t p, std::int64_t r) { | |
- std::int64_t x = random_int32(0, 1000000000); | |
- std::int64_t y = random_int32(0, 1000000000); | |
- std::int64_t z = x * 1000000000L + y; | |
- return p + z % (r - p + 1); | |
-} | |
- | |
-void fill_random_string(std::uint8_t* &s, std::uint64_t length, std::uint64_t sigma) { | |
- for (std::uint64_t i = 0; i < length; ++i) | |
- s[i] = random_int32(0, sigma - 1); | |
-} | |
- | |
-void fill_random_letters(std::uint8_t* &s, std::uint64_t length, std::uint64_t sigma) { | |
- fill_random_string(s, length, sigma); | |
- for (std::uint64_t i = 0; i < length; ++i) | |
- s[i] += 'a'; | |
-} | |
- | |
-std::string random_string_hash() { | |
- uint64_t hash = (uint64_t)rand() * RAND_MAX + rand(); | |
- std::stringstream ss; | |
- ss << hash; | |
- return ss.str(); | |
-} | |
- | |
-std::uint64_t log2ceil(std::uint64_t x) { | |
- std::uint64_t pow2 = 1, w = 0; | |
- while (pow2 < x) { pow2 <<= 1; ++w; } | |
- return w; | |
-} | |
- | |
-std::uint64_t log2floor(std::uint64_t x) { | |
- std::uint64_t pow2 = 1, w = 0; | |
- while ((pow2 << 1) <= x) { pow2 <<= 1; ++w; } | |
- return w; | |
-} | |
- | |
-} // namespace utils | |
-} // namespace em_succinct_irreducible_private | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.hpp | |
deleted file mode 100644 | |
index 4429713e..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.hpp | |
+++ /dev/null | |
@@ -1,123 +0,0 @@ | |
-/** | |
- * @file em_succinct_irreducible_src/utils.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_UTILS_HPP_INCLUDED | |
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_UTILS_HPP_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstdint> | |
-#include <string> | |
-#include <sstream> | |
- | |
- | |
-namespace em_succinct_irreducible_private { | |
-namespace utils { | |
- | |
-long double wclock(); | |
- | |
-std::FILE *file_open(std::string fname, std::string mode); | |
-std::FILE *file_open_nobuf(std::string fname, std::string mode); | |
-std::uint64_t file_size(std::string fname); | |
-bool file_exists(std::string fname); | |
-void file_delete(std::string fname); | |
-std::string absolute_path(std::string fname); | |
-void drop_disk_pages(std::string filename); | |
- | |
-template<typename value_type> | |
-void write_to_file(const value_type *src, std::uint64_t length, std::FILE *f) { | |
- std::uint64_t fwrite_ret = std::fwrite(src, sizeof(value_type), length, f); | |
- if (fwrite_ret != length) { | |
- fprintf(stderr, "\nError: fwrite failed.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
-} | |
- | |
-template<typename value_type> | |
-void write_to_file(const value_type *src, std::uint64_t length, std::string fname) { | |
- std::FILE *f = file_open_nobuf(fname, "w"); | |
- write_to_file(src, length, f); | |
- std::fclose(f); | |
-} | |
- | |
-template<typename value_type> | |
-void read_from_file(value_type* dest, std::uint64_t length, std::FILE *f) { | |
- std::uint64_t fread_ret = std::fread(dest, sizeof(value_type), length, f); | |
- if (fread_ret != length) { | |
- fprintf(stderr, "\nError: fread failed.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
-} | |
- | |
-template<typename value_type> | |
-void read_from_file(value_type* dest, std::uint64_t length, std::string fname) { | |
- std::FILE *f = file_open_nobuf(fname, "r"); | |
- read_from_file<value_type>(dest, length, f); | |
- std::fclose(f); | |
-} | |
- | |
-template<typename value_type> | |
-void read_at_offset(value_type *dest, std::uint64_t offset, | |
- std::uint64_t length, std::FILE *f) { | |
- std::fseek(f, sizeof(value_type) * offset, SEEK_SET); | |
- read_from_file(dest, length, f); | |
-} | |
- | |
-template<typename value_type> | |
-void read_at_offset(value_type *dest, std::uint64_t offset, | |
- std::uint64_t length, std::string filename) { | |
- std::FILE *f = file_open_nobuf(filename, "r"); | |
- read_at_offset(dest, offset, length, f); | |
- std::fclose(f); | |
-} | |
- | |
-std::int32_t random_int32(std::int32_t p, std::int32_t r); | |
-std::int64_t random_int64(std::int64_t p, std::int64_t r); | |
-void fill_random_string(std::uint8_t* &s, std::uint64_t length, std::uint64_t sigma); | |
-void fill_random_letters(std::uint8_t* &s, std::uint64_t length, std::uint64_t sigma); | |
-std::string random_string_hash(); | |
- | |
-std::uint64_t log2ceil(std::uint64_t x); | |
-std::uint64_t log2floor(std::uint64_t x); | |
- | |
-template<typename int_type> | |
-std::string intToStr(int_type x) { | |
- std::stringstream ss; | |
- ss << x; | |
- return ss.str(); | |
-} | |
- | |
-} // namespace utils | |
-} // namespace em_succinct_irreducible_private | |
- | |
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_UTILS_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_array.cpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_array.cpp | |
deleted file mode 100644 | |
index 7d4557f8..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_array.cpp | |
+++ /dev/null | |
@@ -1,236 +0,0 @@ | |
-/** | |
- * @file main.cpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstdint> | |
-#include <ctime> | |
-#include <string> | |
-#include <getopt.h> | |
-#include <unistd.h> | |
- | |
-#include "uint40.hpp" | |
-#include "uint48.hpp" | |
-#include "em_succinct_irreducible_src/compute_lcp_array.hpp" | |
- | |
-char *program_name; | |
- | |
-void usage(int status) { | |
- printf( | |
- | |
-"Usage: %s [OPTION]... FILE\n" | |
-"Construct the LCP array for text stored in FILE.\n" | |
-"\n" | |
-"Mandatory arguments to long options are mandatory for short options too.\n" | |
-" -b, --bwt=BWTFILE specify the location of the Burrows-Wheeler\n" | |
-" transform of FILE (default: FILE.bwt)\n" | |
-" -h, --help display this help and exit\n" | |
-" -i, --intsize=SIZE use integers of SIZE bytes (default: 5). Currently\n" | |
-" supported values are 4, 5, 6, and 8\n" | |
-" -m, --mem=MEM use MEM MiB of RAM for computation (default: 3584)\n" | |
-" -o, --output=OUTFILE specify output filename (default: FILE.lcpX, where\n" | |
-" X = integer size, see the -i flag)\n" | |
-" -s, --sa=SUFARRAY specify the location of the suffix array of FILE\n" | |
-" (default: FILE.saX, X = integer size, see -i flag)\n", | |
- program_name); | |
- | |
- std::exit(status); | |
-} | |
- | |
-bool file_exists(std::string filename) { | |
- std::FILE *f = std::fopen(filename.c_str(), "r"); | |
- bool ret = (f != NULL); | |
- if (f != NULL) std::fclose(f); | |
- | |
- return ret; | |
-} | |
- | |
-std::FILE *file_open(std::string filename, std::string mode) { | |
- std::FILE *f = std::fopen(filename.c_str(), mode.c_str()); | |
- if (f == NULL) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- return f; | |
-} | |
- | |
-std::uint64_t file_size(std::string filename) { | |
- std::FILE *f = file_open(filename, "r"); | |
- std::fseek(f, 0, SEEK_END); | |
- long size = std::ftell(f); | |
- if (size < 0) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- std::fclose(f); | |
- return (std::uint64_t)size; | |
-} | |
- | |
-template<typename int_type> | |
-std::string intToStr(int_type x) { | |
- std::stringstream ss; | |
- ss << x; | |
- return ss.str(); | |
-} | |
- | |
-template<typename text_offset_type> | |
-void compute_lcp_array(std::string text_filename, std::string sa_filename, | |
- std::string bwt_filename, std::string output_filename, std::uint64_t ram_use) { | |
- std::uint64_t text_length = file_size(text_filename); | |
- if (2UL * text_length <= std::numeric_limits<text_offset_type>::max()) { | |
- em_succinct_irreducible_private::compute_lcp_array<text_offset_type, text_offset_type>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- } else { | |
- if (sizeof(text_offset_type) < 4) em_succinct_irreducible_private::compute_lcp_array<text_offset_type, std::uint32_t>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- else if (sizeof(text_offset_type) == 4) em_succinct_irreducible_private::compute_lcp_array<text_offset_type, uint40>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- else if (sizeof(text_offset_type) == 5) em_succinct_irreducible_private::compute_lcp_array<text_offset_type, uint48>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- else em_succinct_irreducible_private::compute_lcp_array<text_offset_type, std::uint64_t>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- } | |
-} | |
- | |
-int main(int argc, char **argv) { | |
- srand(time(0) + getpid()); | |
- program_name = argv[0]; | |
- | |
- static struct option long_options[] = { | |
- {"bwt", required_argument, NULL, 'b'}, | |
- {"help", no_argument, NULL, 'h'}, | |
- {"intsize", required_argument, NULL, 'i'}, | |
- {"mem", required_argument, NULL, 'm'}, | |
- {"output", required_argument, NULL, 'o'}, | |
- {"sa", required_argument, NULL, 's'}, | |
- {NULL, 0, NULL, 0} | |
- }; | |
- | |
- std::uint64_t int_size = 5; | |
- std::uint64_t ram_use = 3584UL << 20; | |
- std::string out_filename(""); | |
- std::string sa_filename(""); | |
- std::string bwt_filename(""); | |
- | |
- // Parse command-line options. | |
- int c; | |
- while ((c = getopt_long(argc, argv, "b:hi:m:o:s:", long_options, NULL)) != -1) { | |
- switch(c) { | |
- case 'b': | |
- bwt_filename = std::string(optarg); | |
- break; | |
- case 'h': | |
- usage(EXIT_FAILURE); | |
- case 'i': | |
- int_size = std::atol(optarg); | |
- if (!(int_size == 4 || int_size == 5 || int_size == 6 || int_size == 8)) { | |
- fprintf(stderr, "Error: invalid int size (%lu)\n\n", int_size); | |
- usage(EXIT_FAILURE); | |
- } | |
- break; | |
- case 'm': | |
- ram_use = std::atol(optarg) << 20; | |
- if (ram_use == 0) { | |
- fprintf(stderr, "Error: invalid RAM limit (%lu)\n\n", ram_use); | |
- usage(EXIT_FAILURE); | |
- } | |
- break; | |
- case 'o': | |
- out_filename = std::string(optarg); | |
- break; | |
- case 's': | |
- sa_filename = std::string(optarg); | |
- break; | |
- default: | |
- usage(EXIT_FAILURE); | |
- } | |
- } | |
- | |
- if (optind >= argc) { | |
- fprintf(stderr, "Error: FILE not provided\n\n"); | |
- usage(EXIT_FAILURE); | |
- } | |
- | |
- // Parse the text filename. | |
- std::string text_filename = std::string(argv[optind++]); | |
- if (optind < argc) { | |
- fprintf(stderr, "Warning: multiple input files provided. " | |
- "Only the first will be processed.\n"); | |
- } | |
- | |
- // Set default filenames (if not provided). | |
- if (sa_filename.empty()) sa_filename = text_filename + ".sa" + intToStr(int_size); | |
- if (out_filename.empty()) out_filename = text_filename + ".lcp" + intToStr(int_size); | |
- if (bwt_filename.empty()) bwt_filename = text_filename + ".bwt"; | |
- | |
- // Check if input text, suffix array, and BWT exist. | |
- if (!file_exists(text_filename)) { | |
- fprintf(stderr, "Error: input file (%s) does not exist\n\n", | |
- text_filename.c_str()); | |
- usage(EXIT_FAILURE); | |
- } | |
- if (!file_exists(sa_filename)) { | |
- fprintf(stderr, "Error: suffix array (%s) does not exist\n\n", | |
- sa_filename.c_str()); | |
- usage(EXIT_FAILURE); | |
- } | |
- if (!file_exists(bwt_filename)) { | |
- fprintf(stderr, "Error: BWT of input text (%s) does not exist\n\n", | |
- bwt_filename.c_str()); | |
- usage(EXIT_FAILURE); | |
- } | |
- | |
- if (file_exists(out_filename)) { | |
- // Output file exists, should we proceed? | |
- char *line = NULL; | |
- std::uint64_t buflen = 0; | |
- std::int64_t len = 0L; | |
- | |
- do { | |
- printf("Output file (%s) exists. Overwrite? [y/n]: ", | |
- out_filename.c_str()); | |
- if ((len = getline(&line, &buflen, stdin)) == -1) { | |
- printf("\nError: failed to read answer\n\n"); | |
- std::fflush(stdout); | |
- usage(EXIT_FAILURE); | |
- } | |
- } while (len != 2 || (line[0] != 'y' && line[0] != 'n')); | |
- | |
- if (line[0] == 'n') { | |
- free(line); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- free(line); | |
- } | |
- | |
- // Run the algorithm. | |
- if (int_size == 4) compute_lcp_array<std::uint32_t>(text_filename, sa_filename, bwt_filename, out_filename, ram_use); | |
- else if (int_size == 5) compute_lcp_array<uint40>(text_filename, sa_filename, bwt_filename, out_filename, ram_use); | |
- else if (int_size == 6) compute_lcp_array<uint48>(text_filename, sa_filename, bwt_filename, out_filename, ram_use); | |
- else compute_lcp_array<std::uint64_t>(text_filename, sa_filename, bwt_filename, out_filename, ram_use); | |
-} | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_from_plcp.cpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_from_plcp.cpp | |
deleted file mode 100644 | |
index c3cab4b1..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_from_plcp.cpp | |
+++ /dev/null | |
@@ -1,188 +0,0 @@ | |
-/** | |
- * @file main.cpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstdint> | |
-#include <ctime> | |
-#include <string> | |
-#include <getopt.h> | |
-#include <unistd.h> | |
- | |
-#include "uint40.hpp" | |
-#include "uint48.hpp" | |
-#include "em_succinct_irreducible_src/compute_lcp_from_plcp.hpp" | |
- | |
-char *program_name; | |
- | |
-void usage(int status) { | |
- printf( | |
- | |
-"Usage: %s [OPTION]... FILE\n" | |
-"Convert PLCP array (bitvector representation) stored in FILE to LCP array.\n" | |
-"\n" | |
-"Mandatory arguments to long options are mandatory for short options too.\n" | |
-" -h, --help display this help and exit\n" | |
-" -i, --intsize=SIZE use integers of SIZE bytes (default: 5). Currently\n" | |
-" supported values are 4, 5, 6, and 8\n" | |
-" -m, --mem=MEM use MEM MiB of RAM for computation (default: 3584)\n" | |
-" -o, --output=OUTFILE specify output filename (default: FILE.lcpX, where\n" | |
-" X = integer size, see the -i flag)\n" | |
-" -s, --sa=SUFARRAY specify the location of the suffix array of FILE\n" | |
-" (default: FILE.saX, X = integer size, see -i flag)\n", | |
- program_name); | |
- | |
- std::exit(status); | |
-} | |
- | |
-bool file_exists(std::string filename) { | |
- std::FILE *f = std::fopen(filename.c_str(), "r"); | |
- bool ret = (f != NULL); | |
- if (f != NULL) std::fclose(f); | |
- | |
- return ret; | |
-} | |
- | |
-template<typename int_type> | |
-std::string intToStr(int_type x) { | |
- std::stringstream ss; | |
- ss << x; | |
- return ss.str(); | |
-} | |
- | |
-int main(int argc, char **argv) { | |
- srand(time(0) + getpid()); | |
- program_name = argv[0]; | |
- | |
- static struct option long_options[] = { | |
- {"help", no_argument, NULL, 'h'}, | |
- {"intsize", required_argument, NULL, 'i'}, | |
- {"mem", required_argument, NULL, 'm'}, | |
- {"output", required_argument, NULL, 'o'}, | |
- {"sa", required_argument, NULL, 's'}, | |
- {NULL, 0, NULL, 0} | |
- }; | |
- | |
- std::uint64_t int_size = 5; | |
- std::uint64_t ram_use = 3584UL << 20; | |
- std::string out_filename(""); | |
- std::string sa_filename(""); | |
- | |
- // Parse command-line options. | |
- int c; | |
- while ((c = getopt_long(argc, argv, "hi:m:o:s:", long_options, NULL)) != -1) { | |
- switch(c) { | |
- case 'h': | |
- usage(EXIT_FAILURE); | |
- case 'i': | |
- int_size = std::atol(optarg); | |
- if (!(int_size == 4 || int_size == 5 || int_size == 6 || int_size == 8)) { | |
- fprintf(stderr, "Error: invalid int size (%lu)\n\n", int_size); | |
- usage(EXIT_FAILURE); | |
- } | |
- break; | |
- case 'm': | |
- ram_use = std::atol(optarg) << 20; | |
- if (ram_use == 0) { | |
- fprintf(stderr, "Error: invalid RAM limit (%lu)\n\n", ram_use); | |
- usage(EXIT_FAILURE); | |
- } | |
- break; | |
- case 'o': | |
- out_filename = std::string(optarg); | |
- break; | |
- case 's': | |
- sa_filename = std::string(optarg); | |
- break; | |
- default: | |
- usage(EXIT_FAILURE); | |
- } | |
- } | |
- | |
- if (optind >= argc) { | |
- fprintf(stderr, "Error: FILE not provided\n\n"); | |
- usage(EXIT_FAILURE); | |
- } | |
- | |
- // Parse the text filename. | |
- std::string input_filename = std::string(argv[optind++]); | |
- if (optind < argc) { | |
- fprintf(stderr, "Warning: multiple input files provided. " | |
- "Only the first will be processed.\n"); | |
- } | |
- | |
- // Set default filenames (if not provided). | |
- if (sa_filename.empty()) sa_filename = input_filename + ".sa" + intToStr(int_size); | |
- if (out_filename.empty()) out_filename = input_filename + ".lcp" + intToStr(int_size); | |
- | |
- // Check if input text, suffix array, and BWT exist. | |
- if (!file_exists(input_filename)) { | |
- fprintf(stderr, "Error: input file (%s) does not exist\n\n", | |
- input_filename.c_str()); | |
- usage(EXIT_FAILURE); | |
- } | |
- if (!file_exists(sa_filename)) { | |
- fprintf(stderr, "Error: suffix array (%s) does not exist\n\n", | |
- sa_filename.c_str()); | |
- usage(EXIT_FAILURE); | |
- } | |
- | |
- if (file_exists(out_filename)) { | |
- // Output file exists, should we proceed? | |
- char *line = NULL; | |
- std::uint64_t buflen = 0; | |
- std::int64_t len = 0L; | |
- | |
- do { | |
- printf("Output file (%s) exists. Overwrite? [y/n]: ", | |
- out_filename.c_str()); | |
- if ((len = getline(&line, &buflen, stdin)) == -1) { | |
- printf("\nError: failed to read answer\n\n"); | |
- std::fflush(stdout); | |
- usage(EXIT_FAILURE); | |
- } | |
- } while (len != 2 || (line[0] != 'y' && line[0] != 'n')); | |
- | |
- if (line[0] == 'n') { | |
- free(line); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- free(line); | |
- } | |
- | |
- // Run the algorithm. | |
- if (int_size == 4) em_succinct_irreducible_private::compute_lcp_from_plcp<std::uint32_t>(input_filename, sa_filename, out_filename, ram_use); | |
- else if (int_size == 5) em_succinct_irreducible_private::compute_lcp_from_plcp<uint40>(input_filename, sa_filename, out_filename, ram_use); | |
- else if (int_size == 6) em_succinct_irreducible_private::compute_lcp_from_plcp<uint48>(input_filename, sa_filename, out_filename, ram_use); | |
- else em_succinct_irreducible_private::compute_lcp_from_plcp<std::uint64_t>(input_filename, sa_filename, out_filename, ram_use); | |
-} | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_plcp_bitvector.cpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_plcp_bitvector.cpp | |
deleted file mode 100644 | |
index cafbb51d..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_plcp_bitvector.cpp | |
+++ /dev/null | |
@@ -1,235 +0,0 @@ | |
-/** | |
- * @file main.cpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstdint> | |
-#include <ctime> | |
-#include <string> | |
-#include <getopt.h> | |
-#include <unistd.h> | |
- | |
-#include "uint40.hpp" | |
-#include "uint48.hpp" | |
-#include "em_succinct_irreducible_src/compute_plcp_bitvector.hpp" | |
- | |
-char *program_name; | |
- | |
-void usage(int status) { | |
- printf( | |
- | |
-"Usage: %s [OPTION]... FILE\n" | |
-"Construct the PLCP array (bitvector representation) for text stored in FILE.\n" | |
-"\n" | |
-"Mandatory arguments to long options are mandatory for short options too.\n" | |
-" -b, --bwt=BWTFILE specify the location of the Burrows-Wheeler\n" | |
-" transform of FILE (default: FILE.bwt)\n" | |
-" -h, --help display this help and exit\n" | |
-" -i, --intsize=SIZE use integers of SIZE bytes (default: 5). Currently\n" | |
-" supported values are 4, 5, 6, and 8\n" | |
-" -m, --mem=MEM use MEM MiB of RAM for computation (default: 3584)\n" | |
-" -o, --output=OUTFILE specify output filename (default: FILE.plcp)\n" | |
-" -s, --sa=SUFARRAY specify the location of the suffix array of FILE\n" | |
-" (default: FILE.saX, X = integer size, see -i flag)\n", | |
- program_name); | |
- | |
- std::exit(status); | |
-} | |
- | |
-bool file_exists(std::string filename) { | |
- std::FILE *f = std::fopen(filename.c_str(), "r"); | |
- bool ret = (f != NULL); | |
- if (f != NULL) std::fclose(f); | |
- | |
- return ret; | |
-} | |
- | |
-std::FILE *file_open(std::string filename, std::string mode) { | |
- std::FILE *f = std::fopen(filename.c_str(), mode.c_str()); | |
- if (f == NULL) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- return f; | |
-} | |
- | |
-std::uint64_t file_size(std::string filename) { | |
- std::FILE *f = file_open(filename, "r"); | |
- std::fseek(f, 0, SEEK_END); | |
- long size = std::ftell(f); | |
- if (size < 0) { | |
- std::perror(filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- std::fclose(f); | |
- return (std::uint64_t)size; | |
-} | |
- | |
-template<typename int_type> | |
-std::string intToStr(int_type x) { | |
- std::stringstream ss; | |
- ss << x; | |
- return ss.str(); | |
-} | |
- | |
-template<typename text_offset_type> | |
-void compute_plcp_bitvector(std::string text_filename, std::string sa_filename, | |
- std::string bwt_filename, std::string output_filename, std::uint64_t ram_use) { | |
- std::uint64_t text_length = file_size(text_filename); | |
- if (2UL * text_length <= std::numeric_limits<text_offset_type>::max()) { | |
- em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, text_offset_type>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- } else { | |
- if (sizeof(text_offset_type) < 4) em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, std::uint32_t>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- else if (sizeof(text_offset_type) == 4) em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, uint40>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- else if (sizeof(text_offset_type) == 5) em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, uint48>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- else em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, std::uint64_t>(text_filename, sa_filename, bwt_filename, output_filename, ram_use); | |
- } | |
-} | |
- | |
-int main(int argc, char **argv) { | |
- srand(time(0) + getpid()); | |
- program_name = argv[0]; | |
- | |
- static struct option long_options[] = { | |
- {"bwt", required_argument, NULL, 'b'}, | |
- {"help", no_argument, NULL, 'h'}, | |
- {"intsize", required_argument, NULL, 'i'}, | |
- {"mem", required_argument, NULL, 'm'}, | |
- {"output", required_argument, NULL, 'o'}, | |
- {"sa", required_argument, NULL, 's'}, | |
- {NULL, 0, NULL, 0} | |
- }; | |
- | |
- std::uint64_t int_size = 5; | |
- std::uint64_t ram_use = 3584UL << 20; | |
- std::string out_filename(""); | |
- std::string sa_filename(""); | |
- std::string bwt_filename(""); | |
- | |
- // Parse command-line options. | |
- int c; | |
- while ((c = getopt_long(argc, argv, "b:hi:m:o:s:", long_options, NULL)) != -1) { | |
- switch(c) { | |
- case 'b': | |
- bwt_filename = std::string(optarg); | |
- break; | |
- case 'h': | |
- usage(EXIT_FAILURE); | |
- case 'i': | |
- int_size = std::atol(optarg); | |
- if (!(int_size == 4 || int_size == 5 || int_size == 6 || int_size == 8)) { | |
- fprintf(stderr, "Error: invalid int size (%lu)\n\n", int_size); | |
- usage(EXIT_FAILURE); | |
- } | |
- break; | |
- case 'm': | |
- ram_use = std::atol(optarg) << 20; | |
- if (ram_use == 0) { | |
- fprintf(stderr, "Error: invalid RAM limit (%lu)\n\n", ram_use); | |
- usage(EXIT_FAILURE); | |
- } | |
- break; | |
- case 'o': | |
- out_filename = std::string(optarg); | |
- break; | |
- case 's': | |
- sa_filename = std::string(optarg); | |
- break; | |
- default: | |
- usage(EXIT_FAILURE); | |
- } | |
- } | |
- | |
- if (optind >= argc) { | |
- fprintf(stderr, "Error: FILE not provided\n\n"); | |
- usage(EXIT_FAILURE); | |
- } | |
- | |
- // Parse the text filename. | |
- std::string text_filename = std::string(argv[optind++]); | |
- if (optind < argc) { | |
- fprintf(stderr, "Warning: multiple input files provided. " | |
- "Only the first will be processed.\n"); | |
- } | |
- | |
- // Set default filenames (if not provided). | |
- if (sa_filename.empty()) sa_filename = text_filename + ".sa" + intToStr(int_size); | |
- if (out_filename.empty()) out_filename = text_filename + ".plcp"; | |
- if (bwt_filename.empty()) bwt_filename = text_filename + ".bwt"; | |
- | |
- // Check if input text, suffix array, and BWT exist. | |
- if (!file_exists(text_filename)) { | |
- fprintf(stderr, "Error: input file (%s) does not exist\n\n", | |
- text_filename.c_str()); | |
- usage(EXIT_FAILURE); | |
- } | |
- if (!file_exists(sa_filename)) { | |
- fprintf(stderr, "Error: suffix array (%s) does not exist\n\n", | |
- sa_filename.c_str()); | |
- usage(EXIT_FAILURE); | |
- } | |
- if (!file_exists(bwt_filename)) { | |
- fprintf(stderr, "Error: BWT of input text (%s) does not exist\n\n", | |
- bwt_filename.c_str()); | |
- usage(EXIT_FAILURE); | |
- } | |
- | |
- if (file_exists(out_filename)) { | |
- // Output file exists, should we proceed? | |
- char *line = NULL; | |
- std::uint64_t buflen = 0; | |
- std::int64_t len = 0L; | |
- | |
- do { | |
- printf("Output file (%s) exists. Overwrite? [y/n]: ", | |
- out_filename.c_str()); | |
- if ((len = getline(&line, &buflen, stdin)) == -1) { | |
- printf("\nError: failed to read answer\n\n"); | |
- std::fflush(stdout); | |
- usage(EXIT_FAILURE); | |
- } | |
- } while (len != 2 || (line[0] != 'y' && line[0] != 'n')); | |
- | |
- if (line[0] == 'n') { | |
- free(line); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- free(line); | |
- } | |
- | |
- // Run the algorithm. | |
- if (int_size == 4) compute_plcp_bitvector<std::uint32_t>(text_filename, sa_filename, bwt_filename, out_filename, ram_use); | |
- else if (int_size == 5) compute_plcp_bitvector<uint40>(text_filename, sa_filename, bwt_filename, out_filename, ram_use); | |
- else if (int_size == 6) compute_plcp_bitvector<uint48>(text_filename, sa_filename, bwt_filename, out_filename, ram_use); | |
- else compute_plcp_bitvector<std::uint64_t>(text_filename, sa_filename, bwt_filename, out_filename, ram_use); | |
-} | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/uint40.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/uint40.hpp | |
deleted file mode 100644 | |
index a2cbe22c..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/uint40.hpp | |
+++ /dev/null | |
@@ -1,78 +0,0 @@ | |
-/** | |
- * @file uint40.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __UINT40_HPP_INCLUDED | |
-#define __UINT40_HPP_INCLUDED | |
- | |
-#include <cstdint> | |
-#include <limits> | |
- | |
- | |
-class uint40 { | |
- private: | |
- std::uint32_t low; | |
- std::uint8_t high; | |
- | |
- public: | |
- uint40() {} | |
- uint40(std::uint32_t l, std::uint8_t h) : low(l), high(h) {} | |
- uint40(const uint40& a) : low(a.low), high(a.high) {} | |
- uint40(const std::int32_t& a) : low(a), high(0) {} | |
- uint40(const std::uint32_t& a) : low(a), high(0) {} | |
- uint40(const std::uint64_t& a) : low(a & 0xFFFFFFFF), high((a >> 32) & 0xFF) {} | |
- uint40(const std::int64_t& a) : low(a & 0xFFFFFFFFL), high((a >> 32) & 0xFF) {} | |
- | |
- inline operator uint64_t() const { return (((std::uint64_t)high) << 32) | (std::uint64_t)low; } | |
- inline bool operator == (const uint40& b) const { return (low == b.low) && (high == b.high); } | |
- inline bool operator != (const uint40& b) const { return (low != b.low) || (high != b.high); } | |
-} __attribute__((packed)); | |
- | |
-namespace std { | |
- | |
-template<> | |
-class numeric_limits<uint40> { | |
- public: | |
- static uint40 min() { | |
- return uint40(std::numeric_limits<std::uint32_t>::min(), | |
- std::numeric_limits<std::uint8_t>::min()); | |
- } | |
- | |
- static uint40 max() { | |
- return uint40(std::numeric_limits<std::uint32_t>::max(), | |
- std::numeric_limits<std::uint8_t>::max()); | |
- } | |
-}; | |
- | |
-} // namespace std | |
- | |
-#endif // __UINT40_HPP_INCLUDED | |
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/uint48.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/uint48.hpp | |
deleted file mode 100644 | |
index a6f6ab31..00000000 | |
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/uint48.hpp | |
+++ /dev/null | |
@@ -1,78 +0,0 @@ | |
-/** | |
- * @file uint48.hpp | |
- * @section LICENCE | |
- * | |
- * This file is part of EM-SuccinctIrreducible v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2016 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __UINT48_HPP_INCLUDED | |
-#define __UINT48_HPP_INCLUDED | |
- | |
-#include <cstdint> | |
-#include <limits> | |
- | |
- | |
-class uint48 { | |
- private: | |
- std::uint32_t low; | |
- std::uint16_t high; | |
- | |
- public: | |
- uint48() {} | |
- uint48(std::uint32_t l, std::uint16_t h) : low(l), high(h) {} | |
- uint48(const uint48& a) : low(a.low), high(a.high) {} | |
- uint48(const std::int32_t& a) : low(a), high(0) {} | |
- uint48(const std::uint32_t& a) : low(a), high(0) {} | |
- uint48(const std::uint64_t& a) : low(a & 0xFFFFFFFF), high((a >> 32) & 0xFFFF) {} | |
- uint48(const std::int64_t& a) : low(a & 0xFFFFFFFFL), high((a >> 32) & 0xFFFF) {} | |
- | |
- inline operator uint64_t() const { return (((std::uint64_t)high) << 32) | (std::uint64_t)low; } | |
- inline bool operator == (const uint48& b) const { return (low == b.low) && (high == b.high); } | |
- inline bool operator != (const uint48& b) const { return (low != b.low) || (high != b.high); } | |
-} __attribute__((packed)); | |
- | |
-namespace std { | |
- | |
-template<> | |
-class numeric_limits<uint48> { | |
- public: | |
- static uint48 min() { | |
- return uint48(std::numeric_limits<std::uint32_t>::min(), | |
- std::numeric_limits<std::uint16_t>::min()); | |
- } | |
- | |
- static uint48 max() { | |
- return uint48(std::numeric_limits<std::uint32_t>::max(), | |
- std::numeric_limits<std::uint16_t>::max()); | |
- } | |
-}; | |
- | |
-} // namespace std | |
- | |
-#endif // __UINT48_HPP_INCLUDED | |
diff --git a/exttools/do.sh b/exttools/do.sh | |
deleted file mode 100755 | |
index 02e1fe90..00000000 | |
--- a/exttools/do.sh | |
+++ /dev/null | |
@@ -1,12 +0,0 @@ | |
-#!/bin/sh | |
- | |
-# $1 = text input | |
-# add the zero byte to the text input, save as $1.0 | |
-stxxltools/build/standardize "${1}" | |
-# compute SA, store as $1.0.sa5 | |
-/bighome/workspace/pSAscan-0.1.0/src/psascan "${1}.0" | |
-# compute ISA and BWT | |
-/bighome/workspace/stxxltools/build/isaandbwt "${1}.0" | |
-# compute PLCP | |
-/bighome/workspace/EM-SuccinctIrreducible-0.1.0/src/construct_plcp_sequential "${1}.0" | |
- | |
diff --git a/exttools/pSAscan-0.1.0/AUTHORS b/exttools/pSAscan-0.1.0/AUTHORS | |
deleted file mode 100644 | |
index af53cfd7..00000000 | |
--- a/exttools/pSAscan-0.1.0/AUTHORS | |
+++ /dev/null | |
@@ -1,2 +0,0 @@ | |
-Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
-Dominik Kempa (contact person) <dominik.kempa (at) gmail.com> | |
diff --git a/exttools/pSAscan-0.1.0/LICENCE b/exttools/pSAscan-0.1.0/LICENCE | |
deleted file mode 100644 | |
index 10333c04..00000000 | |
--- a/exttools/pSAscan-0.1.0/LICENCE | |
+++ /dev/null | |
@@ -1,24 +0,0 @@ | |
-Copyright (C) 2014-2015 | |
-Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
-Dominik Kempa <dominik.kempa (at) gmail.com> | |
- | |
-Permission is hereby granted, free of charge, to any person | |
-obtaining a copy of this software and associated documentation | |
-files (the "Software"), to deal in the Software without | |
-restriction, including without limitation the rights to use, | |
-copy, modify, merge, publish, distribute, sublicense, and/or sell | |
-copies of the Software, and to permit persons to whom the | |
-Software is furnished to do so, subject to the following | |
-conditions: | |
- | |
-The above copyright notice and this permission notice shall be | |
-included in all copies or substantial portions of the Software. | |
- | |
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
-WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
-OTHER DEALINGS IN THE SOFTWARE. | |
diff --git a/exttools/pSAscan-0.1.0/README b/exttools/pSAscan-0.1.0/README | |
deleted file mode 100644 | |
index b1c6509f..00000000 | |
--- a/exttools/pSAscan-0.1.0/README | |
+++ /dev/null | |
@@ -1,206 +0,0 @@ | |
-pSAscan - parallel external memory suffix array construction algorithm. | |
-======================================================================= | |
- | |
- | |
-Description | |
------------ | |
- | |
-This package contains implementation of the parallel external-memory | |
-suffix array construction algorithm called pSAscan described in the paper | |
- | |
- Juha Karkkainen, Dominik Kempa, and Simon J. Puglisi, | |
- Parallel External Memory Suffix Sorting. | |
- In Proc. 26th Annual Symposium on Combinatorial Pattern Matching (CPM) 2015. | |
- | |
-The algorithm is based on the sequential external-memory suffix array | |
-construction algorithm called SAscan described in | |
- | |
- Juha Karkkainen and Dominik Kempa, | |
- Engineering a Lightweight External Memory Suffix Array Construction Algorithm. | |
- In Proc. 2nd International Conference on Algorithms for Big Data (ICABD) 2014. | |
- | |
-The latest version of SAscan/pSAscan is available at: | |
- http://www.cs.helsinki.fi/group/pads/ | |
- | |
- | |
- | |
-Compilation and usage | |
---------------------- | |
- | |
-1. Download http://libdivsufsort.googlecode.com/files/libdivsufsort-2.0.1.tar.gz | |
- and install. Make sure to compile libdivsufsort to static 64-bit libraries, | |
- i.e. set options in the main CMakeLists.txt to | |
- | |
- option(BUILD_SHARED_LIBS "Set to OFF to build static libraries" OFF) | |
- option(BUILD_DIVSUFSORT64 "Build libdivsufsort64" ON) | |
- | |
-2. Compile pSAscan using the provided Makefile | |
- | |
- $ cd src | |
- $ make | |
- | |
-This will produce the executable 'psascan' that allows computing the suffix | |
-array of a given file. For usage, run the 'psascan' program without any | |
-arguments. | |
- | |
-Example | |
-~~~~~~~ | |
- | |
-To compute the suffix array of a file in.txt located in /data01/ using 8GiB | |
-of RAM run the 'psascan' command (assuming you are in the src/ directory) as: | |
- | |
- $ ./psascan /data01/in.txt -m 8192 | |
- | |
-By default, the resulting suffix array is written to a file matching the | |
-filename of the input text with the .sa5 extension (/data01/in.txt.sa5 | |
-in this case). To write the suffix array to a different file, use the | |
--o flag, e.g., | |
- | |
- $ ./psascan /data01/in.txt -m 8192 -o /data02/in.txt.suf | |
- | |
-The current implementation encodes the output suffix array using unsigned | |
-40-bit integers. For further processing of the suffix array, one should use | |
-the same or compatible encoding. The class implementing the unsigned 40-bit | |
-integers is located in the src/psascan_src/uint40.h file. | |
- | |
- | |
- | |
-Disk space requirements | |
------------------------ | |
- | |
-To compute the suffix array of an n-byte input text, pSAscan needs about | |
-7.5n bytes of disk space. This includes the input (n bytes) and output | |
-(5n bytes). | |
- | |
-In the default mode, the 'psascan' program assumes, that there is 6.5n bytes | |
-of free disk space available in the location used as the destination for the | |
-suffix array. This space is used for auxiliary files created during the | |
-computation and to accommodate the output. | |
- | |
-The above disk space requirement may in some cases prohibit the use of | |
-algorithm, e.g., if there is enough space (5n) on one physical disk to hold | |
-the suffix array, but not enough (6.5n) to run the algorithm. To still | |
-allow the computation in such cases, the 'psascan' program implements the | |
--g flag. With this flag, one can force pSAscan to use disk space from two | |
-physically different locations (e.g., on two disks). | |
- | |
-More precisely, out of 6.5n bytes of disk space used by pSAscan, about n | |
-bytes is used to store the so-called "gap array". By default, the gap array | |
-is stored along with the suffix array. The -g flag allows explicitly | |
-specifying the location of the gap array. This way, it suffices that there | |
-is only 5.5n bytes of disk space in the location specified as the destination | |
-of the suffix array. The remaining n bytes can be allocated in other location | |
-specified with the -g flag. | |
- | |
-Example | |
-~~~~~~~ | |
- | |
-Assume the location of input/output files and RAM usage as in the example | |
-from the previous section. To additionally specify the location of the gap | |
-array as /data03/in.txt.gap run the 'psascan' command as: | |
- | |
- $ ./psascan /data01/in.txt -m 8192 -o /data02/in.txt.suf -g /data03/in.txt.gap | |
- | |
- | |
- | |
-RAM requirements | |
----------------- | |
- | |
-The algorithm does not have a fixed memory requirements. In principle, it | |
-can run with any amount of RAM (though there is some minimal per-thread | |
-amount necessary in the streaming phase). However, since the time complexity | |
-(without logarithmic factors) of the algorithm is O(n^2 / M), where M is the | |
-amount of RAM used in the computation, using more RAM decreases the runtime. | |
-Thus, the best performance is achieved when nearly all unused RAM available | |
-in the system (as shown by the Linux 'free' command) is used for the | |
-computation. Leaving about 5% (but not more than 2GiB) of RAM free is | |
-advised to prevent thrashing. | |
- | |
-Example | |
-~~~~~~~ | |
- | |
-On a machine with 12 physical cores and Hyper-Threading (and thus capable | |
-of simultaneously running 24 threads) it takes about a week to compute a | |
-suffix array of a 200GiB file using 3.5GiB of RAM. Using 120GiB of RAM | |
-reduces the time to less than 12 hours. | |
- | |
- | |
- | |
-Troubleshooting | |
---------------- | |
- | |
-1. I am getting "Error: the limit on the maximum number of open files | |
- is too small (...)". | |
- | |
-Solution: The error is caused by the operating system imposing a limit | |
-on the maximum number of files opened by a program. The limit (in Linux | |
-referred to as the soft limit) can be increased with the "ulimit -n newlimit" | |
-command. However, in Linux the soft limit cannot be increased beyond the | |
-so-called "hard limit", which is usually only few times larger than the | |
-soft limit. Furthermore, this is a temporary solution that needs to repeated | |
-every time a new session is started. To increase the limits permanently, | |
-edit (as a root) the file /etc/security/limits.conf and add the following | |
-lines at the end (including the asterisks): | |
- | |
-* soft nofile 128000 | |
-* hard nofile 128000 | |
- | |
-This increases the limit to 128000 (use larger values if necessary). The | |
-new limits apply (check with ulimit -n) after starting a new session. | |
- | |
-2. Program stops without any error message. | |
- | |
-Solution: Most likely the problem occurred during internal-memory sorting. | |
-Re-running the program with -v flag should show the error message. | |
- | |
- | |
- | |
-Limitations / known issues | |
--------------------------- | |
- | |
-1. The maximum size of input text is 1TiB (2^40 bytes). | |
-2. The current implementation supports only inputs over byte alphabet. | |
-3. Only texts not containing bytes with value 255 are handled correctly. | |
- The 255-bytes can be removed from the input text using the tool located | |
- in the directory tools/delete-bytes-255/ of this package. | |
-4. The current internal-memory suffix sorting algorithm used internally | |
- in pSAscan works only if the input text is split into segments of | |
- size at most 2GiB each. Therefore, pSAscan will fail, if the memory | |
- budget X for the computation (specified with the -m flag) satisfies | |
- X / p > 10 * 2^31, where p is the number of threads used during | |
- the computation. On most systems, this is not a severe limitation, | |
- e.g., for a regular 4-core machine supporting Hyper-Threading (and | |
- thus capable of simultaneously running 8 threads), pSAscan can utilize | |
- up to 160GiB of RAM. | |
- | |
-The above limitations (except possibly 2) are not inherent to the algorithm | |
-but rather the current implementation. Future releases will most likely | |
-overcome these limitations. | |
- | |
- | |
- | |
-Third-party code | |
----------------- | |
- | |
-The pSAscan implementation makes use of some third-party code, in particular: | |
- - the uint40 class was copied (and slightly modified) from the eSAIS-0.5.2 | |
- algorithm (https://panthema.net/2012/1119-eSAIS-Inducing-Suffix-and- | |
- LCP-Arrays-in-External-Memory/) | |
- - pSAscan uses the libdivsufsort-2.0.1 algorithm as the internal | |
- suffix-sorting routine (https://code.google.com/p/libdivsufsort/) | |
- | |
- | |
- | |
-Terms of use | |
------------- | |
- | |
-pSAscan is released under the MIT/X11 license. See the file LICENCE for | |
-more details. | |
- | |
-If you use this code, please cite the paper mentioned above and publish | |
-the URL from which you downloaded the code. | |
- | |
- | |
- | |
-Helsinki, June 2015. | |
-Written by Dominik Kempa <dominik.kempa (at) gmail.com> | |
diff --git a/exttools/pSAscan-0.1.0/VERSION b/exttools/pSAscan-0.1.0/VERSION | |
deleted file mode 100644 | |
index 6e8bf73a..00000000 | |
--- a/exttools/pSAscan-0.1.0/VERSION | |
+++ /dev/null | |
@@ -1 +0,0 @@ | |
-0.1.0 | |
diff --git a/exttools/pSAscan-0.1.0/src/Makefile b/exttools/pSAscan-0.1.0/src/Makefile | |
deleted file mode 100644 | |
index b53e1d35..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/Makefile | |
+++ /dev/null | |
@@ -1,13 +0,0 @@ | |
-LIB_DIR = /home/niki/opt/lib | |
-INC_DIR = /home/niki/opt/include | |
-SHELL = /bin/sh | |
-CC = g++ | |
-CFLAGS = -Wall -Wextra -pedantic -Wshadow -funroll-loops -pthread -std=c++0x -DNDEBUG -O3 -march=native -I$(INC_DIR) -L$(LIB_DIR) | |
- | |
-all: psascan | |
-psascan: main.cpp | |
- $(CC) $(CFLAGS) -o psascan ./psascan_src/utils.cpp main.cpp -ldivsufsort -ldivsufsort64 -fopenmp | |
-clean: | |
- /bin/rm -f *.o | |
-nuclear: | |
- /bin/rm -f psascan *.o | |
diff --git a/exttools/pSAscan-0.1.0/src/main.cpp b/exttools/pSAscan-0.1.0/src/main.cpp | |
deleted file mode 100644 | |
index 8a70e1a2..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/main.cpp | |
+++ /dev/null | |
@@ -1,174 +0,0 @@ | |
-/** | |
- * @file src/main.cpp | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <ctime> | |
-#include <string> | |
-#include <getopt.h> | |
-#include <unistd.h> | |
-#include <omp.h> | |
- | |
-#include "psascan_src/psascan.h" | |
- | |
- | |
-char *program_name; | |
- | |
-void usage(int status) { | |
- printf( | |
-"Usage: %s [OPTION]... FILE\n" | |
-"Construct the suffix array for text stored in FILE.\n" | |
-"\n" | |
-"Mandatory arguments to long options are mandatory for short options too.\n" | |
-" -g, --gap=GAPFILE specify the file holding the gap array (default:\n" | |
-" FILE.sa5.gap)\n" | |
-" -h, --help display this help and exit\n" | |
-" -m, --mem=LIMIT limit RAM usage to LIMIT MiB (default: 3072)\n" | |
-" -o, --output=OUTFILE specify the output file (default: FILE.sa5)\n" | |
-" -v, --verbose print detailed information during internal sufsort\n", | |
- program_name); | |
- | |
- std::exit(status); | |
-} | |
- | |
-bool file_exists(std::string fname) { | |
- std::FILE *f = std::fopen(fname.c_str(), "r"); | |
- bool ret = (f != NULL); | |
- if (f != NULL) std::fclose(f); | |
- | |
- return ret; | |
-} | |
- | |
-int main(int argc, char **argv) { | |
- srand(time(0) + getpid()); | |
- program_name = argv[0]; | |
- bool verbose = false; | |
- | |
- static struct option long_options[] = { | |
- {"help", no_argument, NULL, 'h'}, | |
- {"verbose", no_argument, NULL, 'v'}, | |
- {"mem", required_argument, NULL, 'm'}, | |
- {"output", required_argument, NULL, 'o'}, | |
- {"gap", required_argument, NULL, 'g'}, | |
- {NULL, 0, NULL, 0} | |
- }; | |
- | |
- long ram_use = 3072L << 20; | |
- std::string out_fname(""); | |
- std::string gap_fname(""); | |
- | |
- // Parse command-line options. | |
- int c; | |
- while ((c = getopt_long(argc, argv, "hvm:o:g:", long_options, NULL)) != -1) { | |
- switch(c) { | |
- case 'm': | |
- ram_use = std::atol(optarg) << 20; | |
- if (ram_use <= 0L) { | |
- fprintf(stderr, "Error: invalid RAM limit (%ld)\n\n", ram_use); | |
- usage(EXIT_FAILURE); | |
- } | |
- break; | |
- case 'o': | |
- out_fname = std::string(optarg); | |
- break; | |
- case 'g': | |
- gap_fname = std::string(optarg); | |
- break; | |
- case 'v': | |
- verbose = true; | |
- break; | |
- case 'h': | |
- usage(EXIT_FAILURE); | |
- default: | |
- usage(EXIT_FAILURE); | |
- } | |
- } | |
- | |
- if (optind >= argc) { | |
- fprintf(stderr, "Error: FILE not provided\n\n"); | |
- usage(EXIT_FAILURE); | |
- } | |
- | |
- // Parse the text filename. | |
- std::string text_fname = std::string(argv[optind++]); | |
- if (optind < argc) { | |
- fprintf(stderr, "Warning: multiple input files provided. " | |
- "Only the first will be processed.\n"); | |
- } | |
- | |
- // Set default output filename (if not provided). | |
- if (out_fname.empty()) | |
- out_fname = text_fname + ".sa5"; | |
- | |
- // Set default gap filename (if not provided). | |
- if (gap_fname.empty()) | |
- gap_fname = out_fname; | |
- | |
- // Check if input exists. | |
- if (!file_exists(text_fname)) { | |
- fprintf(stderr, "Error: input file (%s) does not exist\n\n", | |
- text_fname.c_str()); | |
- usage(EXIT_FAILURE); | |
- } | |
- | |
- if (file_exists(out_fname)) { | |
- // Output file exists, should we proceed? | |
- char *line = NULL; | |
- size_t buflen = 0; | |
- long len = 0L; | |
- | |
- do { | |
- printf("Output file (%s) exists. Overwrite? [y/n]: ", | |
- out_fname.c_str()); | |
- if ((len = getline(&line, &buflen, stdin)) == -1) { | |
- fprintf(stderr, "\nError: failed to read answer\n\n"); | |
- usage(EXIT_FAILURE); | |
- } | |
- } while (len != 2 || (line[0] != 'y' && line[0] != 'n')); | |
- | |
- if (line[0] == 'n') { | |
- free(line); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- free(line); | |
- } | |
- | |
- // Find the number of (logical) cores on the machine. | |
- long max_threads = (long)omp_get_max_threads(); | |
- | |
- // Run pSAscan. | |
- pSAscan(text_fname, out_fname, gap_fname, | |
- ram_use, max_threads, verbose); | |
-} | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/approx_rank.h b/exttools/pSAscan-0.1.0/src/psascan_src/approx_rank.h | |
deleted file mode 100644 | |
index 0a3fd2f0..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/approx_rank.h | |
+++ /dev/null | |
@@ -1,187 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/approx_rank.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section DESCRIPTION | |
- * | |
- * The approximate rank data structure. Based on the 'sparse-LF' | |
- * data structure described in: | |
- * | |
- * Dominik Kempa, Simon J. Puglisi: | |
- * Lempel-Ziv Factorization: Simple, Fast, Practical. | |
- * In Proc. ALENEX 2013, p. 103-112. | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_APPROX_RANK_H_INCLUDED | |
-#define __PSASCAN_SRC_APPROX_RANK_H_INCLUDED | |
- | |
-#include <thread> | |
-#include <algorithm> | |
- | |
- | |
-namespace psascan_private { | |
- | |
-template<long k_sampling_rate_log> | |
-class approx_rank { | |
- private: | |
- long *m_list_size; | |
- long **m_list; | |
- | |
- static const long k_sampling_rate; | |
- static const long k_sampling_rate_mask; | |
- | |
- public: | |
- long *m_count; | |
- | |
- private: | |
- static void compute_symbol_count_aux(const unsigned char *text, long beg, | |
- long end, long *symbol_count) { | |
- for (long j = beg; j < end; ++j) | |
- ++symbol_count[text[j]]; | |
- } | |
- | |
- static void compute_occ_list_aux(const unsigned char *text, long beg, | |
- long end, long *symbol_count, long **list) { | |
- // Compute where to start writing positions for each symbol. | |
- long *ptr = new long[256]; | |
- for (long c = 0; c < 256; ++c) | |
- ptr[c] = (symbol_count[c] + k_sampling_rate - 1) / k_sampling_rate; | |
- | |
- // Add occurrences in the block to the lists. | |
- for (long j = beg; j < end; ++j) { | |
- unsigned char c = text[j]; | |
- if (!((symbol_count[c]++) & k_sampling_rate_mask)) | |
- list[c][ptr[c]++] = j; | |
- } | |
- | |
- // Clean up. | |
- delete[] ptr; | |
- } | |
- | |
- public: | |
- approx_rank(const unsigned char *text, long length, long max_threads) { | |
- // Compute symbol counts in each block. | |
- long max_block_size = (length + max_threads - 1) / max_threads; | |
- long n_threads = (length + max_block_size - 1) / max_block_size; | |
- long **symbol_count = new long*[n_threads]; | |
- for (long j = 0; j < n_threads; ++j) { | |
- symbol_count[j] = new long[256]; | |
- std::fill(symbol_count[j], symbol_count[j] + 256, 0L); | |
- } | |
- | |
- std::thread **threads = new std::thread*[n_threads]; | |
- for (long t = 0; t < n_threads; ++t) { | |
- long block_beg = t * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- | |
- threads[t] = new std::thread(compute_symbol_count_aux, | |
- text, block_beg, block_end, symbol_count[t]); | |
- } | |
- | |
- for (long t = 0; t < n_threads; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_threads; ++t) delete threads[t]; | |
- | |
- // Compute (exclusive) partial sums over symbol counts. | |
- m_count = new long[256]; | |
- std::fill(m_count, m_count + 256, 0L); | |
- long *temp_count = new long[256]; | |
- for (long i = 0; i < n_threads; ++i) { | |
- std::copy(symbol_count[i], symbol_count[i] + 256, temp_count); | |
- std::copy(m_count, m_count + 256, symbol_count[i]); | |
- for (long j = 0; j < 256; ++j) | |
- m_count[j] += temp_count[j]; | |
- } | |
- delete[] temp_count; | |
- | |
- // Compute sizes and allocate occurrences lists. | |
- m_list_size = new long[256]; | |
- m_list = new long*[256]; | |
- for (long i = 0; i < 256; ++i) { | |
- m_list_size[i] = (m_count[i] + k_sampling_rate - 1) / k_sampling_rate; | |
- if (m_list_size[i]) m_list[i] = new long[m_list_size[i]]; | |
- else m_list[i] = NULL; | |
- } | |
- | |
- for (long t = 0; t < n_threads; ++t) { | |
- long block_beg = t * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- | |
- threads[t] = new std::thread(compute_occ_list_aux, text, | |
- block_beg, block_end, symbol_count[t], m_list); | |
- } | |
- | |
- for (long t = 0; t < n_threads; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_threads; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- | |
- // Clean up. | |
- for (long j = 0; j < n_threads; ++j) | |
- delete[] symbol_count[j]; | |
- delete[] symbol_count; | |
- } | |
- | |
- inline long rank(long i, unsigned char c) const { | |
- if (i <= 0 || (!m_list_size[c]) || m_list[c][0] >= i) | |
- return 0L; | |
- | |
- long left = 0, right = m_list_size[c]; | |
- while (left + 1 != right) { | |
- // Invariant: the answer is in range [left..right). | |
- long mid = (left + right) / 2; | |
- if (m_list[c][mid] <= i) left = mid; | |
- else right = mid; | |
- } | |
- return (left << k_sampling_rate_log); | |
- } | |
- | |
- ~approx_rank() { | |
- delete[] m_count; | |
- delete[] m_list_size; | |
- for (long j = 0; j < 256; ++j) { | |
- if (m_list[j]) | |
- delete[] m_list[j]; | |
- } | |
- delete[] m_list; | |
- } | |
-}; | |
- | |
-template<long k_sampling_rate_log> | |
-const long approx_rank<k_sampling_rate_log>::k_sampling_rate = (1L << k_sampling_rate_log); | |
- | |
-template<long k_sampling_rate_log> | |
-const long approx_rank<k_sampling_rate_log>::k_sampling_rate_mask = (1L << k_sampling_rate_log) - 1; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_APPROX_RANK_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_backward_skip_stream_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_backward_skip_stream_reader.h | |
deleted file mode 100644 | |
index be3dadc0..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_backward_skip_stream_reader.h | |
+++ /dev/null | |
@@ -1,182 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/async_backward_skip_stream_reader.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_ASYNC_BACKWARD_SKIP_STREAM_READER_H_INCLUDED | |
-#define __PSASCAN_SRC_ASYNC_BACKWARD_SKIP_STREAM_READER_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
-#include <algorithm> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-template<typename value_type> | |
-struct async_backward_skip_stream_reader { | |
- template<typename T> | |
- static void io_thread_code(async_backward_skip_stream_reader<T> *reader) { | |
- while (true) { | |
- // Wait until the passive buffer is available. | |
- std::unique_lock<std::mutex> lk(reader->m_mutex); | |
- while (!(reader->m_avail) && !(reader->m_finished)) | |
- reader->m_cv.wait(lk); | |
- | |
- if (!(reader->m_avail) && (reader->m_finished)) { | |
- // We're done, terminate the thread. | |
- lk.unlock(); | |
- return; | |
- } | |
- lk.unlock(); | |
- | |
- // Safely read the data from disk. | |
- long filepos = std::ftell(reader->m_file) / sizeof(T); | |
- long toread = std::min(reader->m_buf_size, filepos - reader->m_active_buf_filled); | |
- if (toread > 0) { | |
- std::fseek(reader->m_file, -((reader->m_active_buf_filled + toread) * sizeof(T)), SEEK_CUR); | |
- reader->m_passive_buf_filled = std::fread(reader->m_passive_buf, sizeof(T), toread, reader->m_file); | |
- } | |
- | |
- // Let the caller know that the I/O thread finished reading. | |
- lk.lock(); | |
- reader->m_avail = false; | |
- lk.unlock(); | |
- reader->m_cv.notify_one(); | |
- } | |
- } | |
- | |
- async_backward_skip_stream_reader(std::string filename, long skip_elems, long bufsize = (4 << 20)) { | |
- m_file = utils::open_file(filename.c_str(), "r"); | |
- std::fseek(m_file, -(skip_elems * sizeof(value_type)), SEEK_END); | |
- | |
- | |
- // Initialize buffers. | |
- long elems = std::max(2UL, (bufsize + sizeof(value_type) - 1) / sizeof(value_type)); | |
- m_buf_size = elems / 2; | |
- | |
- m_active_buf_filled = 0L; | |
- m_passive_buf_filled = 0L; | |
- m_active_buf_pos = -1L; | |
- m_active_buf = (value_type *)malloc(m_buf_size * sizeof(value_type)); | |
- m_passive_buf = (value_type *)malloc(m_buf_size * sizeof(value_type)); | |
- | |
- m_finished = false; | |
- | |
- // Start the I/O thread and immediately start reading. | |
- m_avail = true; | |
- m_thread = new std::thread(io_thread_code<value_type>, this); | |
- } | |
- | |
- ~async_backward_skip_stream_reader() { | |
- // Let the I/O thread know that we're done. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- m_finished = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- | |
- // Wait for the thread to finish. | |
- m_thread->join(); | |
- | |
- // Clean up. | |
- delete m_thread; | |
- free(m_active_buf); | |
- free(m_passive_buf); | |
- std::fclose(m_file); | |
- } | |
- | |
- // This function checks if the reading thread has already | |
- // prefetched the next buffer (the request should have been | |
- // issued before), and waits in case the prefetching was not | |
- // completed yet. | |
- void receive_new_buffer() { | |
- // Wait until the I/O thread finishes reading the previous | |
- // buffer. In most cases this step is instantaneous. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- while (m_avail == true) | |
- m_cv.wait(lk); | |
- | |
- // Set the new active buffer. | |
- std::swap(m_active_buf, m_passive_buf); | |
- m_active_buf_filled = m_passive_buf_filled; | |
- m_active_buf_pos = m_active_buf_filled - 1L; | |
- | |
- // Let the I/O thread know that it can now prefetch | |
- // another buffer. | |
- m_avail = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- } | |
- | |
- inline value_type read() { | |
- if (m_active_buf_pos < 0L) { | |
- // The active buffer run out of data. | |
- // At this point we need to swap it with the passive | |
- // buffer. The request to read that passive buffer should | |
- // have been scheduled long time ago, so hopefully the | |
- // buffer is now available. We check for that, but we | |
- // also might wait, if the reading has not yet been | |
- // finished. At this point we also already schedule | |
- // the next read. | |
- receive_new_buffer(); | |
- } | |
- | |
- return m_active_buf[m_active_buf_pos--]; | |
- } | |
- | |
-private: | |
- value_type *m_active_buf; | |
- value_type *m_passive_buf; | |
- | |
- long m_buf_size; | |
- long m_active_buf_pos; | |
- long m_active_buf_filled; | |
- long m_passive_buf_filled; | |
- | |
- // Used for synchronization with the I/O thread. | |
- std::mutex m_mutex; | |
- std::condition_variable m_cv; | |
- bool m_avail; | |
- bool m_finished; | |
- | |
- std::FILE *m_file; | |
- std::thread *m_thread; | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_ASYNC_BACKWARD_SKIP_STREAM_READER_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_bit_stream_writer.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_bit_stream_writer.h | |
deleted file mode 100644 | |
index 6627ad00..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_bit_stream_writer.h | |
+++ /dev/null | |
@@ -1,180 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/async_bit_stream_writer.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_ASYNC_BIT_STREAM_WRITER_H_INCLUDED | |
-#define __PSASCAN_SRC_ASYNC_BIT_STREAM_WRITER_H_INCLUDED | |
- | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
-#include <algorithm> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-struct async_bit_stream_writer { | |
- static void io_thread_code(async_bit_stream_writer *writer) { | |
- while (true) { | |
- // Wait until the passive buffer is available. | |
- std::unique_lock<std::mutex> lk(writer->m_mutex); | |
- while (!(writer->m_avail) && !(writer->m_finished)) | |
- writer->m_cv.wait(lk); | |
- | |
- if (!(writer->m_avail) && (writer->m_finished)) { | |
- // We're done, terminate the thread. | |
- lk.unlock(); | |
- return; | |
- } | |
- lk.unlock(); | |
- | |
- // Safely write the data to disk. | |
- utils::add_objects_to_file(writer->m_passive_buf, | |
- writer->m_passive_buf_filled, writer->m_file); | |
- | |
- // Let the caller know that the I/O thread finished writing. | |
- lk.lock(); | |
- writer->m_avail = false; | |
- lk.unlock(); | |
- writer->m_cv.notify_one(); | |
- } | |
- } | |
- | |
- async_bit_stream_writer(std::string filename, long bufsize = (4 << 20)) { | |
- m_file = utils::open_file(filename.c_str(), "w"); | |
- | |
- // Initialize buffers. | |
- long elems = std::max(2L, bufsize); | |
- m_buf_size = elems / 2; // both buffers are of the same size | |
- | |
- m_active_buf = (unsigned char *)malloc(m_buf_size); | |
- m_passive_buf = (unsigned char *)malloc(m_buf_size); | |
- | |
- m_active_buf[0] = 0; | |
- m_bit_pos = 0L; | |
- m_active_buf_filled = 0L; | |
- m_passive_buf_filled = 0L; | |
- | |
- m_avail = false; | |
- m_finished = false; | |
- | |
- // Start the I/O thread. | |
- m_thread = new std::thread(io_thread_code, this); | |
- } | |
- | |
- ~async_bit_stream_writer() { | |
- // Write the partially filled active buffer to disk. | |
- if (m_bit_pos != 0) ++m_active_buf_filled; | |
- if (m_active_buf_filled > 0L) | |
- send_active_buf_to_write(); | |
- | |
- // Let the I/O thread know that we're done. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- m_finished = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- | |
- // Wait for the thread to finish. | |
- m_thread->join(); | |
- | |
- // Clean up. | |
- delete m_thread; | |
- free(m_active_buf); | |
- free(m_passive_buf); | |
- std::fclose(m_file); | |
- } | |
- | |
- // Passes on the active buffer (full, unless it's the last one, | |
- // partially filled, buffer passed from destructor) to the I/O thread. | |
- void send_active_buf_to_write() { | |
- // Wait until the I/O thread finishes writing the previous buffer. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- while (m_avail == true) | |
- m_cv.wait(lk); | |
- | |
- // Set the new passive buffer. | |
- std::swap(m_active_buf, m_passive_buf); | |
- m_passive_buf_filled = m_active_buf_filled; | |
- m_active_buf_filled = 0L; | |
- m_bit_pos = 0L; | |
- m_active_buf[0] = 0; | |
- | |
- // Let the I/O thread know that the buffer is waiting. | |
- m_avail = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- } | |
- | |
- inline void write(unsigned char bit) { | |
- m_active_buf[m_active_buf_filled] |= (bit << m_bit_pos); | |
- ++m_bit_pos; | |
- if (m_bit_pos == 8) { | |
- m_bit_pos = 0; | |
- ++m_active_buf_filled; | |
- | |
- // If the active buffer was full, send it to I/O thread. | |
- // This function may wait a bit until the I/O thread | |
- // finishes writing the previous passive buffer. | |
- if (m_active_buf_filled == m_buf_size) | |
- send_active_buf_to_write(); | |
- | |
- // Clear all bits in the current byte. | |
- m_active_buf[m_active_buf_filled] = 0; | |
- } | |
- } | |
- | |
-private: | |
- unsigned char *m_active_buf; | |
- unsigned char *m_passive_buf; | |
- | |
- long m_buf_size; // size of each of the buffers | |
- long m_bit_pos; | |
- long m_active_buf_filled; | |
- long m_passive_buf_filled; | |
- | |
- // Used for synchronization with the I/O thread. | |
- bool m_avail; // signals availability of buffer for I/O thread | |
- bool m_finished; // signals the end of writing | |
- std::mutex m_mutex; | |
- std::condition_variable m_cv; | |
- | |
- std::FILE *m_file; | |
- std::thread *m_thread; | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_ASYNC_STREAM_WRITER_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_multifile_bit_stream_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_multifile_bit_stream_reader.h | |
deleted file mode 100644 | |
index f9ee8cec..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_multifile_bit_stream_reader.h | |
+++ /dev/null | |
@@ -1,236 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/async_multifile_bit_stream_reader.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_ASYNC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED | |
-#define __PSASCAN_SRC_ASYNC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED | |
- | |
-#include <thread> | |
-#include <mutex> | |
-#include <vector> | |
-#include <algorithm> | |
-#include <condition_variable> | |
- | |
-#include "utils.h" | |
-#include "multifile.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-struct async_multifile_bit_stream_reader { | |
- async_multifile_bit_stream_reader(const multifile *m, long start_pos = 0L, | |
- long bufsize = (4L << 20)) { | |
- m_files_info = m->files_info; | |
- | |
- long items = std::max(2L, bufsize); | |
- m_buf_size = items / 2L; | |
- | |
- // Reset counters. | |
- m_active_buf_filled = 0; | |
- m_passive_buf_filled = 0; | |
- m_active_buf_pos = 0; | |
- | |
- // Initialize buffers. | |
- m_active_buf = (unsigned char *)malloc(m_buf_size); | |
- m_passive_buf = (unsigned char *)malloc(m_buf_size); | |
- | |
- // Initialize the reading. | |
- init(start_pos); | |
- } | |
- | |
- void init(long start_pos) { | |
- m_total_read_buf = start_pos; | |
- | |
- m_file = NULL; | |
- for (size_t j = 0; j < m_files_info.size(); ++j) { | |
- if (m_files_info[j].m_beg <= start_pos && start_pos < m_files_info[j].m_end) { | |
- m_file_id = j; | |
- m_file = utils::open_file(m_files_info[j].m_filename, "r"); | |
- break; | |
- } | |
- } | |
- | |
- if (m_file != NULL) { | |
- long offset = start_pos - m_files_info[m_file_id].m_beg; | |
- std::fseek(m_file, offset >> 3, SEEK_SET); | |
- | |
- m_cur_byte = 0; | |
- m_cur_bit = (offset & 7L); | |
- m_active_buf_pos = m_cur_bit; | |
- m_total_read_buf -= m_cur_bit; | |
- | |
- long file_left = m_files_info[m_file_id].m_end - m_total_read_buf; | |
- m_active_buf_filled = std::min(file_left, 8L * m_buf_size); | |
- long toread_bytes = (m_active_buf_filled + 7L) / 8L; | |
- utils::read_n_objects_from_file(m_active_buf, toread_bytes, m_file); | |
- m_total_read_buf += m_active_buf_filled; | |
- if (m_total_read_buf == m_files_info[m_file_id].m_end) { | |
- std::fclose(m_file); | |
- m_file = NULL; | |
- } | |
- } | |
- | |
- m_avail = true; | |
- m_finished = false; | |
- m_thread = new std::thread(async_io_code, this); | |
- } | |
- | |
- inline bool read() { | |
- if (m_active_buf_pos == m_active_buf_filled) | |
- receive_new_buffer(); | |
- | |
- bool result = (m_active_buf[m_cur_byte] & (1 << m_cur_bit)); | |
- ++m_cur_bit; | |
- ++m_active_buf_pos; | |
- if (m_cur_bit == 8) { | |
- m_cur_bit = 0; | |
- ++m_cur_byte; | |
- } | |
- | |
- return result; | |
- } | |
- | |
- ~async_multifile_bit_stream_reader() { | |
- // Let the I/O thread know that we are done. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- m_finished = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- | |
- // Wait for the thread to finish. | |
- m_thread->join(); | |
- | |
- // Clean up. | |
- delete m_thread; | |
- free(m_active_buf); | |
- free(m_passive_buf); | |
- if (m_file) | |
- std::fclose(m_file); | |
- } | |
- | |
- static void async_io_code(async_multifile_bit_stream_reader *file) { | |
- while (true) { | |
- // Wait until the passive buffer is available. | |
- std::unique_lock<std::mutex> lk(file->m_mutex); | |
- while (!(file->m_avail) && !(file->m_finished)) | |
- file->m_cv.wait(lk); | |
- | |
- if (!(file->m_avail) && (file->m_finished)) { | |
- // We're done, terminate the thread. | |
- lk.unlock(); | |
- return; | |
- } | |
- lk.unlock(); | |
- | |
- if (file->m_file == NULL) { | |
- // Find the next file to open. | |
- for (size_t j = 0; j < file->m_files_info.size(); ++j) | |
- if (file->m_files_info[j].m_beg == file->m_total_read_buf) { | |
- file->m_file_id = j; | |
- file->m_file = utils::open_file(file->m_files_info[j].m_filename, "r"); | |
- break; | |
- } | |
- } | |
- | |
- // If file ID was found, we perform the read. | |
- // Otherwise there is no more data to prefetch. | |
- if (file->m_file != NULL) { | |
- long file_left = file->m_files_info[file->m_file_id].m_end - file->m_total_read_buf; | |
- file->m_passive_buf_filled = std::min(file_left, 8L * (file->m_buf_size)); | |
- long toread_bytes = (file->m_passive_buf_filled + 7L) / 8L; | |
- utils::read_n_objects_from_file(file->m_passive_buf, toread_bytes, file->m_file); | |
- file->m_total_read_buf += file->m_passive_buf_filled; | |
- if (file->m_total_read_buf == file->m_files_info[file->m_file_id].m_end) { | |
- std::fclose(file->m_file); | |
- file->m_file = NULL; | |
- } | |
- } | |
- | |
- // Let the caller know that the I/O thread finished reading. | |
- lk.lock(); | |
- file->m_avail = false; | |
- lk.unlock(); | |
- file->m_cv.notify_one(); | |
- } | |
- } | |
- | |
- void receive_new_buffer() { | |
- // Wait until the I/O thread finishes reading the previous | |
- // buffer. Most of the time this step is instantaneous. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- while (m_avail == true) | |
- m_cv.wait(lk); | |
- | |
- // Set the new active buffer. | |
- std::swap(m_active_buf, m_passive_buf); | |
- m_active_buf_filled = m_passive_buf_filled; | |
- m_active_buf_pos = 0; | |
- m_cur_byte = 0; | |
- m_cur_bit = 0; | |
- | |
- // Let the I/O thread know that it can now | |
- // prefetch another buffer. | |
- m_avail = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- } | |
- | |
-private: | |
- std::FILE *m_file; // file handler | |
- long m_total_read_buf; // total number of items read from files into buffers | |
- long m_file_id; | |
- std::vector<single_file_info> m_files_info; | |
- | |
- // Buffers used for asynchronous reading. | |
- unsigned char *m_active_buf; | |
- unsigned char *m_passive_buf; | |
- long m_buf_size; | |
- long m_active_buf_pos; | |
- long m_active_buf_filled; | |
- long m_passive_buf_filled; | |
- | |
- long m_cur_byte; | |
- long m_cur_bit; | |
- | |
- // For synchronization with thread doing asynchronous reading. | |
- std::thread *m_thread; | |
- std::mutex m_mutex; | |
- std::condition_variable m_cv; | |
- bool m_finished; | |
- bool m_avail; | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_ASYNC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_stream_writer.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_stream_writer.h | |
deleted file mode 100644 | |
index 3f7deb54..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_stream_writer.h | |
+++ /dev/null | |
@@ -1,168 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/async_stream_writer.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_ASYNC_STREAM_WRITER_H_INCLUDED | |
-#define __PSASCAN_SRC_ASYNC_STREAM_WRITER_H_INCLUDED | |
- | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
-#include <algorithm> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-template<typename value_type> | |
-struct async_stream_writer { | |
- template<typename T> | |
- static void io_thread_code(async_stream_writer<T> *writer) { | |
- while (true) { | |
- // Wait until the passive buffer is available. | |
- std::unique_lock<std::mutex> lk(writer->m_mutex); | |
- while (!(writer->m_avail) && !(writer->m_finished)) | |
- writer->m_cv.wait(lk); | |
- | |
- if (!(writer->m_avail) && (writer->m_finished)) { | |
- // We're done, terminate the thread. | |
- lk.unlock(); | |
- return; | |
- } | |
- lk.unlock(); | |
- | |
- // Safely write the data to disk. | |
- utils::add_objects_to_file(writer->m_passive_buf, | |
- writer->m_passive_buf_filled, writer->m_file); | |
- | |
- // Let the caller know that the I/O thread finished writing. | |
- lk.lock(); | |
- writer->m_avail = false; | |
- lk.unlock(); | |
- writer->m_cv.notify_one(); | |
- } | |
- } | |
- | |
- async_stream_writer(std::string filename, long bufsize = (4 << 20)) { | |
- m_file = utils::open_file(filename.c_str(), "w"); | |
- | |
- // Initialize buffers. | |
- long elems = std::max(2UL, (bufsize + sizeof(value_type) - 1) / sizeof(value_type)); | |
- m_buf_size = elems / 2; // both buffers are of the same size | |
- | |
- m_active_buf_filled = 0L; | |
- m_passive_buf_filled = 0L; | |
- | |
- m_active_buf = (value_type *)malloc(m_buf_size * sizeof(value_type)); | |
- m_passive_buf = (value_type *)malloc(m_buf_size * sizeof(value_type)); | |
- | |
- m_avail = false; | |
- m_finished = false; | |
- | |
- // Start the I/O thread. | |
- m_thread = new std::thread(io_thread_code<value_type>, this); | |
- } | |
- | |
- ~async_stream_writer() { | |
- // Write the partially filled active buffer to disk. | |
- if (m_active_buf_filled > 0L) | |
- send_active_buf_to_write(); | |
- | |
- // Let the I/O thread know that we're done. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- m_finished = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- | |
- // Wait for the thread to finish. | |
- m_thread->join(); | |
- | |
- // Clean up. | |
- delete m_thread; | |
- free(m_active_buf); | |
- free(m_passive_buf); | |
- std::fclose(m_file); | |
- } | |
- | |
- // Passes on the active buffer (full, unless it's the last one, | |
- // partially filled, buffer passed from destructor) to the I/O thread. | |
- void send_active_buf_to_write() { | |
- // Wait until the I/O thread finishes writing the previous buffer. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- while (m_avail == true) | |
- m_cv.wait(lk); | |
- | |
- // Set the new passive buffer. | |
- std::swap(m_active_buf, m_passive_buf); | |
- m_passive_buf_filled = m_active_buf_filled; | |
- m_active_buf_filled = 0L; | |
- | |
- // Let the I/O thread know that the buffer is waiting. | |
- m_avail = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- } | |
- | |
- inline void write(value_type x) { | |
- m_active_buf[m_active_buf_filled++] = x; | |
- | |
- // If the active buffer was full, send it to I/O thread. | |
- // This function may wait a bit until the I/O thread | |
- // finishes writing the previous passive buffer. | |
- if (m_active_buf_filled == m_buf_size) | |
- send_active_buf_to_write(); | |
- } | |
- | |
-private: | |
- value_type *m_active_buf; | |
- value_type *m_passive_buf; | |
- | |
- long m_buf_size; // size of each of the buffers | |
- long m_active_buf_filled; | |
- long m_passive_buf_filled; | |
- | |
- // Used for synchronization with the I/O thread. | |
- bool m_avail; // signals availability of buffer for I/O thread | |
- bool m_finished; // signals the end of writing | |
- std::mutex m_mutex; | |
- std::condition_variable m_cv; | |
- | |
- std::FILE *m_file; | |
- std::thread *m_thread; | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_ASYNC_STREAM_WRITER_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_vbyte_stream_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_vbyte_stream_reader.h | |
deleted file mode 100644 | |
index 18bd0cb1..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_vbyte_stream_reader.h | |
+++ /dev/null | |
@@ -1,185 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/async_vbyte_stream_reader.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_ASYNC_VBYTE_STREAM_READER_H_INCLUDED | |
-#define __PSASCAN_SRC_ASYNC_VBYTE_STREAM_READER_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
-#include <algorithm> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-template<typename value_type> | |
-struct async_vbyte_stream_reader { | |
- static void io_thread_code(async_vbyte_stream_reader *reader) { | |
- while (true) { | |
- // Wait until the passive buffer is available. | |
- std::unique_lock<std::mutex> lk(reader->m_mutex); | |
- while (!(reader->m_avail) && !(reader->m_finished)) | |
- reader->m_cv.wait(lk); | |
- | |
- if (!(reader->m_avail) && (reader->m_finished)) { | |
- // We're done, terminate the thread. | |
- lk.unlock(); | |
- return; | |
- } | |
- lk.unlock(); | |
- | |
- // Safely read the data from disk. | |
- long count = std::fread(reader->m_passive_buf, 1, reader->m_buf_size + 128, reader->m_file); | |
- if (count > reader->m_buf_size) { | |
- reader->m_passive_buf_filled = reader->m_buf_size; | |
- std::fseek(reader->m_file, reader->m_buf_size - count, SEEK_CUR); | |
- } else reader->m_passive_buf_filled = count; | |
- | |
- // Let the caller know that the I/O thread finished reading. | |
- lk.lock(); | |
- reader->m_avail = false; | |
- lk.unlock(); | |
- reader->m_cv.notify_one(); | |
- } | |
- } | |
- | |
- async_vbyte_stream_reader(std::string filename, long bufsize = (4L << 20)) { | |
- m_file = utils::open_file(filename.c_str(), "r"); | |
- | |
- // Initialize buffers. | |
- long elems = std::max(4096L, bufsize); | |
- m_buf_size = elems / 2; | |
- | |
- m_active_buf_filled = 0L; | |
- m_passive_buf_filled = 0L; | |
- m_active_buf_pos = 0L; | |
- m_active_buf = (unsigned char *)malloc(m_buf_size + 128); | |
- m_passive_buf = (unsigned char *)malloc(m_buf_size + 128); | |
- | |
- m_finished = false; | |
- | |
- // Start the I/O thread and immediately start reading. | |
- m_avail = true; | |
- m_thread = new std::thread(io_thread_code, this); | |
- } | |
- | |
- ~async_vbyte_stream_reader() { | |
- // Let the I/O thread know that we're done. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- m_finished = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- | |
- // Wait for the thread to finish. | |
- m_thread->join(); | |
- | |
- // Clean up. | |
- delete m_thread; | |
- free(m_active_buf); | |
- free(m_passive_buf); | |
- std::fclose(m_file); | |
- } | |
- | |
- // This function checks if the reading thread has already | |
- // prefetched the next buffer (the request should have been | |
- // issued before), and waits in case the prefetching was not | |
- // completed yet. | |
- void receive_new_buffer(long skipped_bytes) { | |
- // Wait until the I/O thread finishes reading the previous | |
- // buffer. In most cases, this step is instantaneous. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- while (m_avail == true) | |
- m_cv.wait(lk); | |
- | |
- // Set the new active buffer. | |
- std::swap(m_active_buf, m_passive_buf); | |
- m_active_buf_filled = m_passive_buf_filled; | |
- m_active_buf_pos = skipped_bytes; | |
- | |
- // Let the I/O thread know that it can now prefetch | |
- // another buffer. | |
- m_avail = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- } | |
- | |
- inline value_type read() { | |
- if (m_active_buf_pos >= m_active_buf_filled) { | |
- // The active buffer run out of data. | |
- // At this point we need to swap it with the passive | |
- // buffer. The request to read that passive buffer should | |
- // have been scheduled long time ago, so hopefully the | |
- // buffer is now available. We check for that, but we | |
- // also might wait, if the reading has not yet been finished. | |
- // At this point we also already schedule the next read. | |
- receive_new_buffer(m_active_buf_pos - m_active_buf_filled); | |
- } | |
- | |
- value_type result = 0L; | |
- long offset = 0L; | |
- while (m_active_buf[m_active_buf_pos] & 0x80) { | |
- result |= (((value_type)m_active_buf[m_active_buf_pos++] & 0x7F) << offset); | |
- offset += 7; | |
- } | |
- result |= ((value_type)m_active_buf[m_active_buf_pos++] << offset); | |
- | |
- return result; | |
- } | |
- | |
-private: | |
- unsigned char *m_active_buf; | |
- unsigned char *m_passive_buf; | |
- | |
- long m_buf_size; | |
- long m_active_buf_pos; | |
- long m_active_buf_filled; | |
- long m_passive_buf_filled; | |
- | |
- // Used for synchronization with the I/O thread. | |
- std::mutex m_mutex; | |
- std::condition_variable m_cv; | |
- bool m_avail; | |
- bool m_finished; | |
- | |
- std::FILE *m_file; | |
- std::thread *m_thread; | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_ASYNC_VBYTE_STREAM_READER_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/background_block_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/background_block_reader.h | |
deleted file mode 100644 | |
index 517bb68a..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/background_block_reader.h | |
+++ /dev/null | |
@@ -1,155 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/background_block_reader.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_BACKGROUND_BLOCK_READER_H_INCLUDED | |
-#define __PSASCAN_SRC_BACKGROUND_BLOCK_READER_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <algorithm> | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-struct background_block_reader { | |
- public: | |
- unsigned char *m_data; | |
- long m_start; | |
- long m_size; | |
- | |
- private: | |
- static const long k_chunk_size; | |
- | |
- // These variables are protected by m_mutex. | |
- long m_fetched; | |
- bool m_signal_stop; | |
- bool m_joined; | |
- | |
- std::mutex m_mutex; | |
- | |
- // This condition variable is used by the I/O thread to notify | |
- // the waiting threads when the next chunk is read. | |
- std::condition_variable m_cv; | |
- | |
- std::thread *m_thread; | |
- std::FILE *m_file; | |
- | |
- private: | |
- static void io_thread_main(background_block_reader &reader) { | |
- while (true) { | |
- std::unique_lock<std::mutex> lk(reader.m_mutex); | |
- long fetched = reader.m_fetched; | |
- bool signal_stop = reader.m_signal_stop; | |
- lk.unlock(); | |
- | |
- if (fetched == reader.m_size || signal_stop) break; | |
- | |
- long toread = std::min(reader.m_size - fetched, reader.k_chunk_size); | |
- unsigned char *dest = reader.m_data + fetched; | |
- utils::read_n_objects_from_file(dest, toread, reader.m_file); | |
- | |
- lk.lock(); | |
- reader.m_fetched += toread; | |
- lk.unlock(); | |
- reader.m_cv.notify_all(); | |
- } | |
- | |
- // Close the file and exit. | |
- std::fclose(reader.m_file); | |
- } | |
- | |
- public: | |
- background_block_reader(std::string filename, long start, long size) { | |
- m_start = start; | |
- m_size = size; | |
- | |
- // Initialize file and buffer. | |
- m_data = (unsigned char *)malloc(m_size); | |
- m_file = utils::open_file(filename, "r"); | |
- std::fseek(m_file, m_start, SEEK_SET); | |
- m_fetched = 0; | |
- | |
- // Start the I/O thread. | |
- m_signal_stop = false; | |
- m_joined = false; | |
- m_thread = new std::thread(io_thread_main, std::ref(*this)); | |
- } | |
- | |
- ~background_block_reader() { | |
- if (!m_joined) { | |
- fprintf(stderr, "\nError: the I/O thread is still not joined when " | |
- "destroying an object of backgroud_block_reader.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Note: m_file is already closed. | |
- delete m_thread; | |
- free(m_data); | |
- } | |
- | |
- inline void stop() { | |
- // Set the flag for the thread to stop. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- m_signal_stop = true; | |
- lk.unlock(); | |
- | |
- // Wait until the thread notices the flag and exits. Possibly the thread | |
- // is already not running, but in this case this call will do nothing. | |
- m_thread->join(); | |
- | |
- // To detect (in the destructor) if stop() was called. | |
- lk.lock(); | |
- m_joined = true; | |
- lk.unlock(); | |
- } | |
- | |
- inline void wait(long target_fetched) { | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- while (m_fetched < target_fetched) | |
- m_cv.wait(lk); | |
- lk.unlock(); | |
- } | |
-}; | |
- | |
-const long background_block_reader::k_chunk_size = (1L << 20); | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_BACKGROUND_BLOCK_READER_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/background_chunk_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/background_chunk_reader.h | |
deleted file mode 100644 | |
index 4e7fce44..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/background_chunk_reader.h | |
+++ /dev/null | |
@@ -1,166 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/background_chunk_reader.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_BACKGROUND_CHUNK_READER_H_INCLUDED | |
-#define __PSASCAN_SRC_BACKGROUND_CHUNK_READER_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <string> | |
-#include <algorithm> | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-struct background_chunk_reader { | |
- private: | |
- std::FILE *m_file; | |
- long m_chunk_length; | |
- long m_end; | |
- | |
- std::condition_variable m_cv; | |
- std::mutex m_mutex; | |
- std::thread *m_thread; | |
- | |
- bool m_signal_read_next_chunk; | |
- bool m_signal_stop; | |
- | |
- long m_cur; | |
- unsigned char *m_passive_chunk; | |
- | |
- public: | |
- unsigned char *m_chunk; | |
- | |
- private: | |
- static void async_io_code(background_chunk_reader &r) { | |
- while (true) { | |
- std::unique_lock<std::mutex> lk(r.m_mutex); | |
- while (!r.m_signal_read_next_chunk && !r.m_signal_stop) | |
- r.m_cv.wait(lk); | |
- | |
- bool sig_stop = r.m_signal_stop; | |
- r.m_signal_read_next_chunk = false; | |
- lk.unlock(); | |
- | |
- if (sig_stop) break; | |
- | |
- long next_chunk_length = std::min(r.m_chunk_length, r.m_end - r.m_cur); | |
- utils::read_n_objects_from_file(r.m_passive_chunk, next_chunk_length, r.m_file); | |
- | |
- lk.lock(); | |
- r.m_cur += next_chunk_length; | |
- lk.unlock(); | |
- r.m_cv.notify_all(); | |
- } | |
- } | |
- | |
- public: | |
- background_chunk_reader(std::string filename, long beg, | |
- long end, long chunk_length = (1L << 20)) { | |
- if (beg > end) { | |
- fprintf(stderr, "Error: beg > end in background_chunk_reader.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- if (beg == end) return; | |
- | |
- m_cur = beg; | |
- m_end = end; | |
- | |
- m_chunk_length = chunk_length; | |
- m_chunk = (unsigned char *)malloc(m_chunk_length); | |
- m_passive_chunk = (unsigned char *)malloc(m_chunk_length); | |
- | |
- m_file = utils::open_file(filename, "r"); | |
- std::fseek(m_file, m_cur, SEEK_SET); | |
- | |
- m_signal_stop = false; | |
- m_signal_read_next_chunk = true; | |
- m_thread = new std::thread(async_io_code, std::ref(*this)); | |
- } | |
- | |
- inline void wait(long end) { | |
- if (end > m_end) { | |
- fprintf(stderr, "Error: end > m_end in background_chunk_reader.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- while (m_cur != end) | |
- m_cv.wait(lk); | |
- | |
- if (m_signal_read_next_chunk) { | |
- fprintf(stderr, "Error: m_signal_read_next_chunk in the wrong state.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- std::swap(m_chunk, m_passive_chunk); | |
- m_signal_read_next_chunk = true; | |
- | |
- lk.unlock(); | |
- m_cv.notify_all(); | |
- } | |
- | |
- ~background_chunk_reader() { | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- m_signal_stop = true; | |
- lk.unlock(); | |
- m_cv.notify_all(); | |
- | |
- // Wait until the thread notices the flag and exits. Possibly the thread | |
- // is already not running, but in this case this call will do nothing. | |
- m_thread->join(); | |
- | |
- std::fclose(m_file); | |
- | |
- // Clean up. | |
- delete m_thread; | |
- free(m_chunk); | |
- free(m_passive_chunk); | |
- } | |
- | |
- inline long get_chunk_size() const { | |
- return m_chunk_length; | |
- } | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_BACKGROUND_CHUNK_READER_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/bitvector.h b/exttools/pSAscan-0.1.0/src/psascan_src/bitvector.h | |
deleted file mode 100644 | |
index 11498a28..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/bitvector.h | |
+++ /dev/null | |
@@ -1,111 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/bitvector.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_BITVECTOR_H_INCLUDED | |
-#define __PSASCAN_SRC_BITVECTOR_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <stdint.h> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-struct bitvector { | |
- private: | |
- long m_alloc_bytes; | |
- unsigned char *m_data; | |
- | |
- public: | |
- bitvector(std::string filename) { | |
- utils::read_objects_from_file<unsigned char>(m_data, m_alloc_bytes, filename); | |
- } | |
- | |
- bitvector(long length) { | |
- m_alloc_bytes = (length + 7) / 8; | |
- m_data = (unsigned char *)calloc(m_alloc_bytes, sizeof(unsigned char)); | |
- } | |
- | |
- inline bool get(long i) const { | |
- return m_data[i >> 3] & (1 << (i & 7)); | |
- } | |
- | |
- inline void set(long i) { | |
- m_data[i >> 3] |= (1 << (i & 7)); | |
- } | |
- | |
- inline void reset(long i) { | |
- m_data[i >> 3] &= (~(1 << (i & 7))); | |
- } | |
- | |
- inline void flip(long i) { | |
- if (get(i)) reset(i); | |
- else set(i); | |
- } | |
- | |
- inline void save(std::string filename) const { | |
- utils::write_objects_to_file<unsigned char>(m_data, m_alloc_bytes, filename); | |
- } | |
- | |
- // Number of 1 bits in the range [beg..end). | |
- long range_sum(long beg, long end) const { | |
- long result = 0L; | |
- | |
- long j = beg; | |
- while (j < end && (j & 63)) | |
- result += get(j++); | |
- | |
- uint64_t *ptr64 = (uint64_t *)(m_data + (j >> 3)); | |
- while (j + 64 <= end) { | |
- result += __builtin_popcountll(*ptr64++); | |
- j += 64; | |
- } | |
- | |
- while (j < end) | |
- result += get(j++); | |
- | |
- return result; | |
- } | |
- | |
- ~bitvector() { | |
- free(m_data); | |
- } | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_BITVECTOR_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/bwt_merge.h b/exttools/pSAscan-0.1.0/src/psascan_src/bwt_merge.h | |
deleted file mode 100644 | |
index 437f2298..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/bwt_merge.h | |
+++ /dev/null | |
@@ -1,146 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/bwt_merge.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_BWT_MERGE_H_INCLUDED | |
-#define __PSASCAN_SRC_BWT_MERGE_H_INCLUDED | |
- | |
-#include <thread> | |
-#include <algorithm> | |
- | |
-#include "bitvector.h" | |
-#include "ranksel_support.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-//============================================================================== | |
-// Compute bwt[beg..end). | |
-//============================================================================== | |
-void merge_bwt_aux(long beg, long end, long left_ptr, long right_ptr, | |
- const unsigned char *left_bwt, const unsigned char *right_bwt, unsigned char *bwt, | |
- const bitvector *bv) { | |
- for (long i = beg; i < end; ++i) { | |
- if (bv->get(i)) bwt[i] = right_bwt[right_ptr++]; | |
- else bwt[i] = left_bwt[left_ptr++]; | |
- } | |
-} | |
- | |
-void compute_initial_rank(long i, const ranksel_support *ranksel, long &result) { | |
- result = ranksel->rank(i); | |
-} | |
- | |
-//============================================================================== | |
-// Merge partial bwt of half-blocks (of size left_size and right_size) into | |
-// partial bwt of the whole block. | |
-//============================================================================== | |
-long merge_bwt(const unsigned char *left_bwt, const unsigned char *right_bwt, | |
- long left_size, long right_size, long left_block_i0, long right_block_i0, | |
- unsigned char left_block_last, unsigned char *bwt, const bitvector *bv, | |
- long max_threads) { | |
- long block_size = left_size + right_size; | |
- | |
- // 1 | |
- // | |
- // Initialize rank/select queries support for bv. | |
- ranksel_support *bv_ranksel = new ranksel_support(bv, block_size, max_threads); | |
- | |
- // 2 | |
- // | |
- // Compute range size. | |
- long max_range_size = (block_size + max_threads - 1) / max_threads; | |
- long n_ranges = (block_size + max_range_size - 1) / max_range_size; | |
- | |
- // 3 | |
- // | |
- // Compute starting parameters for each thread. | |
- long *left_ptr = new long[n_ranges]; | |
- long *right_ptr = new long[n_ranges]; | |
- long *rank_at_range_beg = new long[n_ranges]; | |
- | |
- std::thread **threads = new std::thread*[n_ranges]; | |
- for (long t = 0; t < n_ranges; ++t) { | |
- long range_beg = t * max_range_size; | |
- threads[t] = new std::thread(compute_initial_rank, | |
- range_beg, bv_ranksel, std::ref(rank_at_range_beg[t])); | |
- } | |
- | |
- for (long t = 0; t < n_ranges; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_ranges; ++t) delete threads[t]; | |
- | |
- for (long t = 0; t < n_ranges; ++t) { | |
- long range_beg = t * max_range_size; | |
- left_ptr[t] = range_beg - rank_at_range_beg[t]; | |
- right_ptr[t] = rank_at_range_beg[t]; | |
- } | |
- delete[] rank_at_range_beg; | |
- | |
- // 4 | |
- // | |
- // Merge BWTs in parallel. | |
- for (long t = 0; t < n_ranges; ++t) { | |
- long range_beg = max_range_size * t; | |
- long range_end = std::min(range_beg + max_range_size, block_size); | |
- | |
- threads[t] = new std::thread(merge_bwt_aux, range_beg, range_end, | |
- left_ptr[t], right_ptr[t], left_bwt, right_bwt, bwt, bv); | |
- } | |
- | |
- for (long t = 0; t < n_ranges; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_ranges; ++t) delete threads[t]; | |
- delete[] threads; | |
- delete[] left_ptr; | |
- delete[] right_ptr; | |
- | |
- // 5 | |
- // | |
- // Find position j = select_1(bv, right_block_i0) and replace bwt[j] with | |
- // left_block_last. To speed up the search for j, we use sparse_rank. | |
- bwt[bv_ranksel->select1(right_block_i0)] = left_block_last; | |
- | |
- // 6 | |
- // | |
- // Compute the returned value. | |
- long block_i0 = bv_ranksel->select0(left_block_i0); | |
- | |
- // 7 | |
- // | |
- // Clean up and exit. | |
- delete bv_ranksel; | |
- return block_i0; | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_BWT_MERGE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/compute_gap.h b/exttools/pSAscan-0.1.0/src/psascan_src/compute_gap.h | |
deleted file mode 100644 | |
index 683f0ae5..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/compute_gap.h | |
+++ /dev/null | |
@@ -1,163 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/compute_gap.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_COMPUTE_GAP_H_INCLUDED | |
-#define __PSASCAN_SRC_COMPUTE_GAP_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstring> | |
-#include <string> | |
-#include <thread> | |
-#include <algorithm> | |
-#include <vector> | |
- | |
-#include "utils.h" | |
-#include "rank.h" | |
-#include "gap_array.h" | |
-#include "gap_buffer.h" | |
-#include "stream.h" | |
-#include "update.h" | |
-#include "stream_info.h" | |
-#include "multifile.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-//============================================================================== | |
-// Compute the gap for an arbitrary range of suffixes of tail. This version is | |
-// more general, and can be used also when processing half-blocks. | |
-//============================================================================== | |
-template<typename block_offset_type> | |
-void compute_gap(const rank4n<> *rank, buffered_gap_array *gap, | |
- long tail_begin, long tail_end, long text_length, long max_threads, | |
- long block_isa0, long gap_buf_size, unsigned char block_last_symbol, | |
- std::vector<long> initial_ranks, std::string text_filename, std::string output_filename, | |
- const multifile *tail_gt_begin_rev, multifile *newtail_gt_begin_rev) { | |
- long tail_length = tail_end - tail_begin; | |
- long stream_max_block_size = (tail_length + max_threads - 1) / max_threads; | |
- long n_threads = (tail_length + stream_max_block_size - 1) / stream_max_block_size; | |
- | |
- fprintf(stderr, " Stream:"); | |
- long double stream_start = utils::wclock(); | |
- | |
- // 1 | |
- // | |
- // Get symbol counts of a block and turn into exclusive partial sum. | |
- long *count = new long[256]; | |
- std::copy(rank->m_count, rank->m_count + 256, count); | |
- ++count[block_last_symbol]; | |
- --count[0]; | |
- for (long j = 0, s = 0, t; j < 256; ++j) { | |
- t = count[j]; | |
- count[j] = s; | |
- s += t; | |
- } | |
- | |
- // 2 | |
- // | |
- // Allocate gap buffers. | |
- long n_gap_buffers = 2 * max_threads; | |
- gap_buffer<block_offset_type> **gap_buffers = new gap_buffer<block_offset_type>*[n_gap_buffers]; | |
- for (long i = 0L; i < n_gap_buffers; ++i) | |
- gap_buffers[i] = new gap_buffer<block_offset_type>(gap_buf_size, max_threads); | |
- | |
- // 3 | |
- // | |
- // Create poll of empty and full buffers. | |
- gap_buffer_poll<block_offset_type> *empty_gap_buffers = new gap_buffer_poll<block_offset_type>(); | |
- gap_buffer_poll<block_offset_type> *full_gap_buffers = new gap_buffer_poll<block_offset_type>(n_threads); | |
- | |
- // 4 | |
- // | |
- // Add all buffers to the poll of empty buffers. | |
- for (long i = 0L; i < n_gap_buffers; ++i) | |
- empty_gap_buffers->add(gap_buffers[i]); | |
- | |
- // 5 | |
- // | |
- // Start threads doing the backward search. | |
- stream_info info(n_threads, tail_length); | |
- std::thread **streamers = new std::thread*[n_threads]; | |
- std::vector<std::string> gt_filenames(n_threads); | |
- | |
- for (long t = 0L; t < n_threads; ++t) { | |
- long stream_block_beg = tail_begin + t * stream_max_block_size; | |
- long stream_block_end = std::min(stream_block_beg + stream_max_block_size, tail_end); | |
- | |
- gt_filenames[t] = output_filename + ".gt_tail." + utils::random_string_hash(); | |
- newtail_gt_begin_rev->add_file(text_length - stream_block_end, text_length - stream_block_beg, gt_filenames[t]); | |
- | |
- streamers[t] = new std::thread(parallel_stream<block_offset_type>, full_gap_buffers, empty_gap_buffers, stream_block_beg, | |
- stream_block_end, initial_ranks[t], count, block_isa0, rank, block_last_symbol, text_filename, text_length, | |
- std::ref(gt_filenames[t]), &info, t, gap->m_length, gap_buf_size, tail_gt_begin_rev, max_threads); | |
- } | |
- | |
- // 6 | |
- // | |
- // Start threads doing the gap array updates. | |
- std::thread *updater = new std::thread(gap_updater<block_offset_type>, | |
- full_gap_buffers, empty_gap_buffers, gap, max_threads); | |
- | |
- // 7 | |
- // | |
- // Wait for all threads to finish. | |
- for (long i = 0L; i < n_threads; ++i) streamers[i]->join(); | |
- updater->join(); | |
- | |
- // 8 | |
- // | |
- // Clean up. | |
- for (long i = 0L; i < n_threads; ++i) delete streamers[i]; | |
- for (long i = 0L; i < n_gap_buffers; ++i) delete gap_buffers[i]; | |
- delete updater; | |
- delete[] streamers; | |
- delete[] gap_buffers; | |
- delete empty_gap_buffers; | |
- delete full_gap_buffers; | |
- delete[] count; | |
- | |
- // 9 | |
- // | |
- // Print summary and exit. | |
- long double stream_time = utils::wclock() - stream_start; | |
- long double speed = (tail_length / (1024.L * 1024)) / stream_time; | |
- fprintf(stderr,"\r Stream: 100.0%%. Time: %.2Lfs. Speed: %.2LfMiB/s\n", | |
- stream_time, speed); | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_COMPUTE_GAP_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/compute_left_gap.h b/exttools/pSAscan-0.1.0/src/psascan_src/compute_left_gap.h | |
deleted file mode 100644 | |
index 4b07489e..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/compute_left_gap.h | |
+++ /dev/null | |
@@ -1,312 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/compute_left_gap.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_COMPUTE_LEFT_GAP_H_INCLUDED | |
-#define __PSASCAN_SRC_COMPUTE_LEFT_GAP_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
-#include <algorithm> | |
- | |
-#include "bitvector.h" | |
-#include "ranksel_support.h" | |
-#include "gap_array.h" | |
-#include "parallel_utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-//============================================================================== | |
-// Compute the range_gap values corresponging to bv[part_beg..part_end). | |
-//============================================================================== | |
-void lblock_handle_bv_part(long part_beg, long part_end, long range_beg, | |
- long *range_gap, const gap_array_2n *block_gap, const bitvector *bv, | |
- const ranksel_support *bv_ranksel, long &res_sum, long &res_rank) { | |
- size_t excess_ptr = std::lower_bound(block_gap->m_excess.begin(), | |
- block_gap->m_excess.end(), part_beg) - block_gap->m_excess.begin(); | |
- | |
- // Initialize j. | |
- long j = part_beg; | |
- | |
- // Compute gap[j]. | |
- long gap_j = block_gap->m_count[j]; | |
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) { | |
- ++excess_ptr; | |
- gap_j += (1L << 16); | |
- } | |
- | |
- // Initialize sum. | |
- long sum = gap_j + 1; | |
- | |
- while (j != part_end - 1 && bv->get(j) == 1) { | |
- // Update j. | |
- ++j; | |
- | |
- // Compute gap[j]. | |
- gap_j = block_gap->m_count[j]; | |
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) { | |
- ++excess_ptr; | |
- gap_j += (1L << 16); | |
- } | |
- | |
- // Update sum. | |
- sum += gap_j + 1; | |
- } | |
- if (bv->get(j) == 0) --sum; | |
- | |
- // Store gap[part_beg] + .. + gap[j] and bv.rank0(part_beg) (== bv.rank0(j)). | |
- res_sum = sum; | |
- res_rank = bv_ranksel->rank0(part_beg); | |
- | |
- if (j == part_end - 1) | |
- return; | |
- | |
- sum = 0L; | |
- long range_gap_ptr = res_rank + 1; | |
- while (j != part_end - 1) { | |
- // Update j. | |
- ++j; | |
- | |
- // Compute gap[j]. | |
- gap_j = block_gap->m_count[j]; | |
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) { | |
- ++excess_ptr; | |
- gap_j += (1L << 16); | |
- } | |
- | |
- // Update sum. | |
- sum += gap_j + 1; | |
- | |
- // Update range_gap. | |
- if (bv->get(j) == 0) { | |
- range_gap[range_gap_ptr - range_beg] = sum - 1; | |
- ++range_gap_ptr; | |
- sum = 0L; | |
- } | |
- } | |
- | |
- if (bv->get(j) == 1) | |
- range_gap[range_gap_ptr - range_beg] = sum; | |
-} | |
- | |
- | |
-void lblock_async_write_code(unsigned char* &slab, long &length, std::mutex &mtx, | |
- std::condition_variable &cv, bool &avail, bool &finished, std::string filename) { | |
- while (true) { | |
- // Wait until the passive buffer is available. | |
- std::unique_lock<std::mutex> lk(mtx); | |
- while (!avail && !finished) | |
- cv.wait(lk); | |
- | |
- if (!avail && finished) { | |
- // We're done, terminate the thread. | |
- lk.unlock(); | |
- return; | |
- } | |
- lk.unlock(); | |
- | |
- // Safely write the data to disk. | |
- utils::add_objects_to_file(slab, length, filename); | |
- | |
- // Let the caller know that the I/O thread finished writing. | |
- lk.lock(); | |
- avail = false; | |
- lk.unlock(); | |
- cv.notify_one(); | |
- } | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Given the gap array of the block (representation using 2 bytes per elements) | |
-// and the gap array of the left half-block wrt right half-block (bitvector | |
-// representation), compute the gap array (wrt tail) of the left half-block | |
-// and write to a given file using v-byte encoding. | |
-// | |
-// The whole computation is performed under given ram budget. It is fully | |
-// parallelized and uses asynchronous I/O as much as possible. | |
-//============================================================================== | |
-void compute_left_gap(long left_block_size, long right_block_size, | |
- const gap_array_2n *block_gap, bitvector *bv, std::string out_filename, | |
- long max_threads, long ram_budget) { | |
- long block_size = left_block_size + right_block_size; | |
- long left_gap_size = left_block_size + 1; | |
- | |
- // NOTE: we require that bv has room for one extra bit at the end | |
- // which we use as a sentinel. The actual value of that bit | |
- // prior to calling this function does not matter. | |
- bv->reset(block_size); | |
- long bv_size = block_size + 1; | |
- | |
- fprintf(stderr, " Compute gap array for left half-block: "); | |
- long compute_gap_start = utils::wclock(); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: Preprocess left_block_gap_bv for rank and select queries, | |
- // i.e., compute sparse_gap. | |
- //---------------------------------------------------------------------------- | |
- ranksel_support *bv_ranksel = new ranksel_support(bv, bv_size, max_threads); | |
- | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: compute the values of the right gap array, one range at a time. | |
- //---------------------------------------------------------------------------- | |
- long max_range_size = std::max(1L, ram_budget / (3L * (long)sizeof(long))); | |
- long n_ranges = (left_gap_size + max_range_size - 1) / max_range_size; | |
- | |
- // To ensure that asynchronous I/O is really taking | |
- // place, we try to make 8 parts. | |
- if (n_ranges < 8L) { | |
- max_range_size = (left_gap_size + 7L) / 8L; | |
- n_ranges = (left_gap_size + max_range_size - 1) / max_range_size; | |
- } | |
- | |
- long *range_gap = (long *)malloc(max_range_size * sizeof(long)); | |
- unsigned char *active_vbyte_slab = (unsigned char *)malloc(max_range_size * sizeof(long)); | |
- unsigned char *passive_vbyte_slab = (unsigned char *)malloc(max_range_size * sizeof(long)); | |
- long active_vbyte_slab_length; | |
- long passive_vbyte_slab_length; | |
- | |
- // Used for communication with thread doing asynchronous writes. | |
- std::mutex mtx; | |
- std::condition_variable cv; | |
- bool avail = false; | |
- bool finished = false; | |
- | |
- // Start the thread doing asynchronous writes. | |
- std::thread *async_writer = new std::thread(lblock_async_write_code, | |
- std::ref(passive_vbyte_slab), std::ref(passive_vbyte_slab_length), | |
- std::ref(mtx), std::ref(cv), std::ref(avail), std::ref(finished), | |
- out_filename); | |
- | |
- for (long range_id = 0L; range_id < n_ranges; ++range_id) { | |
- // Compute the range [range_beg..range_end) of values in the left gap | |
- // array (which is indexed [0..left_gap_size)). | |
- long range_beg = range_id * max_range_size; | |
- long range_end = std::min(range_beg + max_range_size, left_gap_size); | |
- long range_size = range_end - range_beg; | |
- | |
- // 2.a | |
- // | |
- // Find the section in the bitvector that contains | |
- // the bits necessary to compute the answer. | |
- long bv_section_beg = 0L; | |
- long bv_section_end = 0L; | |
- if (range_beg > 0) | |
- bv_section_beg = bv_ranksel->select0(range_beg - 1) + 1; | |
- bv_section_end = bv_ranksel->select0(range_end - 1) + 1; | |
- long bv_section_size = bv_section_end - bv_section_beg; | |
- | |
- // Split the current bitvector section into | |
- // equal parts. Each thread handles one part. | |
- long max_part_size = (bv_section_size + max_threads - 1) / max_threads; | |
- long n_parts = (bv_section_size + max_part_size - 1) / max_part_size; | |
- | |
- parallel_utils::parallel_fill<long>(range_gap, range_size, 0L, max_threads); | |
- | |
- // Allocate arrays used to store the answers for part boundaries. | |
- long *res_sum = new long[n_parts]; | |
- long *res_rank = new long[n_parts]; | |
- | |
- std::thread **threads = new std::thread*[n_parts]; | |
- for (long t = 0; t < n_parts; ++t) { | |
- long part_beg = bv_section_beg + t * max_part_size; | |
- long part_end = std::min(part_beg + max_part_size, bv_section_end); | |
- | |
- threads[t] = new std::thread(lblock_handle_bv_part, part_beg, part_end, range_beg, | |
- range_gap, block_gap, bv, bv_ranksel, std::ref(res_sum[t]), std::ref(res_rank[t])); | |
- } | |
- | |
- for (long t = 0; t < n_parts; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_parts; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- // Update range_gap with values computed at part boundaries. | |
- for (long t = 0; t < n_parts; ++t) | |
- range_gap[res_rank[t] - range_beg] += res_sum[t]; | |
- delete[] res_sum; | |
- delete[] res_rank; | |
- | |
- // 2.c | |
- // | |
- // Convert the range_gap to the slab of vbyte encoding. | |
- active_vbyte_slab_length = parallel_utils::convert_array_to_vbyte_slab( | |
- range_gap, range_size, active_vbyte_slab, max_threads); | |
- | |
- // 2.d | |
- // | |
- // Schedule asynchronous write of the slab. | |
- // First, wait for the I/O thread to finish writing. | |
- std::unique_lock<std::mutex> lk(mtx); | |
- while (avail == true) | |
- cv.wait(lk); | |
- | |
- // Set the new passive slab. | |
- std::swap(active_vbyte_slab, passive_vbyte_slab); | |
- passive_vbyte_slab_length = active_vbyte_slab_length; | |
- | |
- // Let the I/O thread know that the slab is waiting. | |
- avail = true; | |
- lk.unlock(); | |
- cv.notify_one(); | |
- } | |
- | |
- // Let the I/O thread know that we're done. | |
- std::unique_lock<std::mutex> lk(mtx); | |
- finished = true; | |
- lk.unlock(); | |
- cv.notify_one(); | |
- | |
- // Wait for the thread to finish. | |
- async_writer->join(); | |
- | |
- // Clean up. | |
- delete async_writer; | |
- delete bv_ranksel; | |
- free(range_gap); | |
- free(active_vbyte_slab); | |
- free(passive_vbyte_slab); | |
- | |
- long double compute_gap_time = utils::wclock() - compute_gap_start; | |
- long double compute_gap_speed = (block_size / (1024.L * 1024)) / compute_gap_time; | |
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", compute_gap_time, compute_gap_speed); | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_COMPUTE_LEFT_GAP_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/compute_right_gap.h b/exttools/pSAscan-0.1.0/src/psascan_src/compute_right_gap.h | |
deleted file mode 100644 | |
index 9b28d7fb..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/compute_right_gap.h | |
+++ /dev/null | |
@@ -1,311 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/compute_right_gap.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_COMPUTE_RIGHT_GAP_H_INCLUDED | |
-#define __PSASCAN_SRC_COMPUTE_RIGHT_GAP_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
-#include <algorithm> | |
- | |
-#include "bitvector.h" | |
-#include "ranksel_support.h" | |
-#include "gap_array.h" | |
-#include "parallel_utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-//============================================================================== | |
-// Compute the range_gap values corresponging to bv[part_beg..part_end). | |
-//============================================================================== | |
-void rblock_handle_bv_part(long part_beg, long part_end, long range_beg, | |
- long *range_gap, const gap_array_2n *block_gap, const bitvector *bv, | |
- const ranksel_support *bv_ranksel, long &res_sum, long &res_rank) { | |
- size_t excess_ptr = std::lower_bound(block_gap->m_excess.begin(), | |
- block_gap->m_excess.end(), part_beg) - block_gap->m_excess.begin(); | |
- | |
- // Initialize j. | |
- long j = part_beg; | |
- | |
- // Compute gap[j]. | |
- long gap_j = block_gap->m_count[j]; | |
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) { | |
- ++excess_ptr; | |
- gap_j += (1L << 16); | |
- } | |
- | |
- // Initialize sum. | |
- long sum = gap_j; | |
- | |
- while (j != part_end - 1 && bv->get(j) == 0) { | |
- // Update j. | |
- ++j; | |
- | |
- // Compute gap[j]. | |
- gap_j = block_gap->m_count[j]; | |
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) { | |
- ++excess_ptr; | |
- gap_j += (1L << 16); | |
- } | |
- | |
- // Update sum. | |
- sum += gap_j; | |
- } | |
- | |
- // Store gap[part_beg] + .. + gap[j] and bv.rank(part_beg) (== bv.rank(j)). | |
- res_sum = sum; | |
- res_rank = bv_ranksel->rank(part_beg); | |
- | |
- if (j == part_end - 1) | |
- return; | |
- | |
- sum = 0L; | |
- long range_gap_ptr = res_rank + 1; | |
- while (j != part_end - 1) { | |
- // Update j. | |
- ++j; | |
- | |
- // Compute gap[j]. | |
- gap_j = block_gap->m_count[j]; | |
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) { | |
- ++excess_ptr; | |
- gap_j += (1L << 16); | |
- } | |
- | |
- // Update sum. | |
- sum += gap_j; | |
- | |
- // Update range_gap. | |
- if (bv->get(j) == 1) { | |
- range_gap[range_gap_ptr - range_beg] = sum; | |
- ++range_gap_ptr; | |
- sum = 0L; | |
- } | |
- } | |
- | |
- if (bv->get(j) == 0) | |
- range_gap[range_gap_ptr - range_beg] = sum; | |
-} | |
- | |
- | |
-void rblock_async_write_code(unsigned char* &slab, long &length, std::mutex &mtx, | |
- std::condition_variable &cv, bool &avail, bool &finished, std::string filename) { | |
- while (true) { | |
- // Wait until the passive buffer is available. | |
- std::unique_lock<std::mutex> lk(mtx); | |
- while (!avail && !finished) | |
- cv.wait(lk); | |
- | |
- if (!avail && finished) { | |
- // We're done, terminate the thread. | |
- lk.unlock(); | |
- return; | |
- } | |
- lk.unlock(); | |
- | |
- // Safely write the data to disk. | |
- utils::add_objects_to_file(slab, length, filename); | |
- | |
- // Let the caller know that the I/O thread finished writing. | |
- lk.lock(); | |
- avail = false; | |
- lk.unlock(); | |
- cv.notify_one(); | |
- } | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Given the gap array of the block (representation using 2 bytes per elements) | |
-// and the gap array of the left half-block wrt right half-block (bitvector | |
-// representation), compute the gap array (wrt tail) of the right half-block | |
-// and write to a given file using v-byte encoding. | |
-// | |
-// The whole computation is performed under given ram budget. It is fully | |
-// parallelized and uses asynchronous I/O as much as possible. | |
-//============================================================================== | |
-void compute_right_gap(long left_block_size, long right_block_size, | |
- const gap_array_2n *block_gap, bitvector *bv, std::string out_filename, | |
- long max_threads, long ram_budget) { | |
- long block_size = left_block_size + right_block_size; | |
- long right_gap_size = right_block_size + 1; | |
- | |
- // NOTE: we require that bv has room for one extra bit at the end | |
- // which we use as a sentinel. The actual value of that bit | |
- // prior to calling this function does not matter. | |
- bv->set(block_size); | |
- long bv_size = block_size + 1; | |
- | |
- fprintf(stderr, " Compute gap array for right half-block: "); | |
- long compute_gap_start = utils::wclock(); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: Preprocess left_block_gap_bv for rank and select queries, | |
- // i.e., compute sparse_gap. | |
- //---------------------------------------------------------------------------- | |
- ranksel_support *bv_ranksel = new ranksel_support(bv, bv_size, max_threads); | |
- | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: compute the values of the right gap array, one range at a time. | |
- //---------------------------------------------------------------------------- | |
- long max_range_size = std::max(1L, ram_budget / (3L * (long)sizeof(long))); | |
- long n_ranges = (right_gap_size + max_range_size - 1) / max_range_size; | |
- | |
- // To ensure that asynchronous I/O is really taking | |
- // place, we try to make 8 parts. | |
- if (n_ranges < 8L) { | |
- max_range_size = (right_gap_size + 7L) / 8L; | |
- n_ranges = (right_gap_size + max_range_size - 1) / max_range_size; | |
- } | |
- | |
- long *range_gap = (long *)malloc(max_range_size * sizeof(long)); | |
- unsigned char *active_vbyte_slab = (unsigned char *)malloc(max_range_size * sizeof(long)); | |
- unsigned char *passive_vbyte_slab = (unsigned char *)malloc(max_range_size * sizeof(long)); | |
- long active_vbyte_slab_length; | |
- long passive_vbyte_slab_length; | |
- | |
- // Used for communication with thread doing asynchronous writes. | |
- std::mutex mtx; | |
- std::condition_variable cv; | |
- bool avail = false; | |
- bool finished = false; | |
- | |
- // Start the thread doing asynchronous writes. | |
- std::thread *async_writer = new std::thread(rblock_async_write_code, | |
- std::ref(passive_vbyte_slab), std::ref(passive_vbyte_slab_length), | |
- std::ref(mtx), std::ref(cv), std::ref(avail), std::ref(finished), | |
- out_filename); | |
- | |
- for (long range_id = 0L; range_id < n_ranges; ++range_id) { | |
- // Compute the range [range_beg..range_end) of values in the right gap | |
- // array (which is indexed [0..right_gap_size)). | |
- long range_beg = range_id * max_range_size; | |
- long range_end = std::min(range_beg + max_range_size, right_gap_size); | |
- long range_size = range_end - range_beg; | |
- | |
- // 2.a | |
- // | |
- // Find the section in the bitvector that contains | |
- // the bits necessary to compute the answer. | |
- long bv_section_beg = 0L; | |
- long bv_section_end = 0L; | |
- if (range_beg > 0) | |
- bv_section_beg = bv_ranksel->select1(range_beg - 1) + 1; | |
- bv_section_end = bv_ranksel->select1(range_end - 1) + 1; | |
- long bv_section_size = bv_section_end - bv_section_beg; | |
- | |
- // Split the current bitvector section into | |
- // equal parts. Each thread handles one part. | |
- long max_part_size = (bv_section_size + max_threads - 1) / max_threads; | |
- long n_parts = (bv_section_size + max_part_size - 1) / max_part_size; | |
- | |
- parallel_utils::parallel_fill<long>(range_gap, range_size, 0L, max_threads); | |
- | |
- // Allocate arrays used to store the answers for part boundaries. | |
- long *res_sum = new long[n_parts]; | |
- long *res_rank = new long[n_parts]; | |
- | |
- std::thread **threads = new std::thread*[n_parts]; | |
- for (long t = 0; t < n_parts; ++t) { | |
- long part_beg = bv_section_beg + t * max_part_size; | |
- long part_end = std::min(part_beg + max_part_size, bv_section_end); | |
- | |
- threads[t] = new std::thread(rblock_handle_bv_part, part_beg, part_end, range_beg, | |
- range_gap, block_gap, bv, bv_ranksel, std::ref(res_sum[t]), std::ref(res_rank[t])); | |
- } | |
- | |
- for (long t = 0; t < n_parts; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_parts; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- // Update range_gap with values computed at part boundaries. | |
- for (long t = 0; t < n_parts; ++t) | |
- range_gap[res_rank[t] - range_beg] += res_sum[t]; | |
- delete[] res_sum; | |
- delete[] res_rank; | |
- | |
- // 2.c | |
- // | |
- // Convert the range_gap to the slab of vbyte encoding. | |
- active_vbyte_slab_length = parallel_utils::convert_array_to_vbyte_slab( | |
- range_gap, range_size, active_vbyte_slab, max_threads); | |
- | |
- // 2.d | |
- // | |
- // Schedule asynchronous write of the slab. | |
- // First, wait for the I/O thread to finish writing. | |
- std::unique_lock<std::mutex> lk(mtx); | |
- while (avail == true) | |
- cv.wait(lk); | |
- | |
- // Set the new passive slab. | |
- std::swap(active_vbyte_slab, passive_vbyte_slab); | |
- passive_vbyte_slab_length = active_vbyte_slab_length; | |
- | |
- // Let the I/O thread know that the slab is waiting. | |
- avail = true; | |
- lk.unlock(); | |
- cv.notify_one(); | |
- } | |
- | |
- // Let the I/O thread know that we're done. | |
- std::unique_lock<std::mutex> lk(mtx); | |
- finished = true; | |
- lk.unlock(); | |
- cv.notify_one(); | |
- | |
- // Wait for the thread to finish. | |
- async_writer->join(); | |
- | |
- // Clean up. | |
- delete async_writer; | |
- delete bv_ranksel; | |
- free(range_gap); | |
- free(active_vbyte_slab); | |
- free(passive_vbyte_slab); | |
- | |
- long double compute_gap_time = utils::wclock() - compute_gap_start; | |
- long double compute_gap_speed = (block_size / (1024.L * 1024)) / compute_gap_time; | |
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", compute_gap_time, compute_gap_speed); | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_COMPUTE_RIGHT_GAP_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/distributed_file.h b/exttools/pSAscan-0.1.0/src/psascan_src/distributed_file.h | |
deleted file mode 100644 | |
index 6d77cecd..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/distributed_file.h | |
+++ /dev/null | |
@@ -1,360 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/distributed_file.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_DISTRIBUTED_FILE_H_INCLUDED | |
-#define __PSASCAN_SRC_DISTRIBUTED_FILE_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <string> | |
-#include <thread> | |
-#include <mutex> | |
-#include <algorithm> | |
-#include <condition_variable> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-template<typename value_type> | |
-struct distributed_file { | |
- distributed_file(std::string filename_base, long max_bytes) { | |
- m_state = STATE_INIT; | |
- m_max_items = std::max(1UL, max_bytes / sizeof(value_type)); | |
- m_filename = filename_base + ".distrfile." + utils::random_string_hash(); | |
- } | |
- | |
- distributed_file(std::string filename_base, long max_bytes, | |
- const value_type *begin, const value_type *end) { | |
- m_state = STATE_INIT; | |
- m_max_items = std::max(1UL, max_bytes / sizeof(value_type)); | |
- m_filename = filename_base + ".distrfile." + utils::random_string_hash(); | |
- | |
- initialize_writing(); | |
- write(begin, end); | |
- finish_writing(); | |
- } | |
- | |
- | |
- void initialize_writing() { | |
- if (m_state != STATE_INIT) { | |
- fprintf(stderr, "\nError: initializing writing in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- m_state = STATE_WRITING; | |
- m_total_write = 0; | |
- m_files_cnt = 0; | |
- make_new_file(); | |
- } | |
- | |
- void write(const value_type *begin, const value_type *end) { | |
- if (m_state != STATE_WRITING) { | |
- fprintf(stderr, "\nError: write in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Fill the current file. | |
- if (m_cur_file_write != m_max_items) { | |
- long left = m_max_items - m_cur_file_write; | |
- long towrite = std::min(left, end - begin); | |
- utils::add_objects_to_file(begin, towrite, m_file); | |
- m_cur_file_write += towrite; | |
- m_total_write += towrite; | |
- begin += towrite; | |
- } | |
- | |
- // Write remaining items. | |
- while (begin < end) { | |
- std::fclose(m_file); | |
- make_new_file(); | |
- | |
- long towrite = std::min(m_max_items, end - begin); | |
- utils::add_objects_to_file(begin, towrite, m_file); | |
- m_cur_file_write += towrite; | |
- m_total_write += towrite; | |
- begin += towrite; | |
- } | |
- } | |
- | |
- void finish_writing() { | |
- if (m_state != STATE_WRITING) { | |
- fprintf(stderr, "\nError: finishing writing when in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- if (m_cur_file_write == 0) { | |
- fprintf(stderr, "\nError: nothing was ever written to %s\n", m_filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- std::fclose(m_file); | |
- m_state = STATE_WRITTEN; | |
- } | |
- | |
- void initialize_reading(long bufsize = (4 << 20)) { | |
- if (m_state != STATE_WRITTEN) { | |
- fprintf(stderr, "\nError: initializing reading in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Compute buffer size. | |
- m_state = STATE_READING; | |
- long items = std::max(2UL, (bufsize + sizeof(value_type) - 1) / sizeof(value_type)); | |
- m_buf_size = items / 2L; | |
- | |
- // Reset counters. | |
- m_active_buf_filled = 0; | |
- m_passive_buf_filled = 0; | |
- m_active_buf_pos = 0; | |
- m_total_read_buf = 0; | |
- m_total_read_user = 0; | |
- m_cur_file = -1; | |
- | |
- // Initialize buffers. | |
- m_active_buf = (value_type *)malloc(m_buf_size * sizeof(value_type)); | |
- m_passive_buf = (value_type *)malloc(m_buf_size * sizeof(value_type)); | |
- | |
- // Start the I/O thread and immediatelly start reading. | |
- m_avail = true; | |
- m_finished = false; | |
- m_thread = new std::thread(async_io_code<value_type>, this); | |
- } | |
- | |
- inline value_type read() { | |
- if (m_state != STATE_READING) { | |
- fprintf(stderr, "\nError: reading in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- if (m_active_buf_pos == m_active_buf_filled) | |
- receive_new_buffer(); | |
- | |
- m_total_read_user++; | |
- return m_active_buf[m_active_buf_pos++]; | |
- } | |
- | |
- void finish_reading() { | |
- if (m_state != STATE_READING) { | |
- fprintf(stderr, "\nError: finishing reading in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- if (m_total_read_buf != m_total_read_user || m_total_read_user != m_total_write) { | |
- fprintf(stderr, "\nError: not all elems were read from distributed file %s\n", m_filename.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Let the I/O thread know that we are done. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- m_finished = true; | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- | |
- // Wait for the thread to finish. | |
- m_thread->join(); | |
- | |
- // Clean up. | |
- delete m_thread; | |
- close_and_destroy_cur_file(); | |
- free(m_active_buf); | |
- free(m_passive_buf); | |
- | |
- // Enter the terminal state. | |
- m_state = STATE_READ; | |
- } | |
- | |
- std::string state_string() const { | |
- switch(m_state) { | |
- case STATE_INIT: return "STATE_INIT"; | |
- case STATE_WRITING: return "STATE_WRITING"; | |
- case STATE_WRITTEN: return "STATE_WRITTEN"; | |
- case STATE_READING: return "STATE_READING"; | |
- case STATE_READ: return "STATE_READ"; | |
- default: return "undefined state"; | |
- } | |
- } | |
- | |
- void close_and_destroy_cur_file() { | |
- if (m_state != STATE_READING) { | |
- fprintf(stderr, "\nError: destroying a file in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- if (!m_file) { | |
- fprintf(stderr, "\nError: deleting a NULL file\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- std::fclose(m_file); | |
- std::string cur_fname = m_filename + ".part" + utils::intToStr(m_cur_file); | |
- utils::file_delete(cur_fname); | |
- } | |
- | |
- template<typename T> | |
- static void async_io_code(distributed_file<T> *file) { | |
- while (true) { | |
- // Wait until the passive buffer is available. | |
- std::unique_lock<std::mutex> lk(file->m_mutex); | |
- while (!(file->m_avail) && !(file->m_finished)) | |
- file->m_cv.wait(lk); | |
- | |
- if (!(file->m_avail) && (file->m_finished)) { | |
- // We're done, terminate the thread. | |
- lk.unlock(); | |
- return; | |
- } | |
- lk.unlock(); | |
- | |
- // This should never happen. | |
- if (file->m_total_read_buf == file->m_total_write) { | |
- fprintf(stderr, "\nError: trying to read past the end of file\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Safely process the passive buffer. | |
- // Check if we need to open next file. | |
- if (file->m_cur_file == -1 || file->m_cur_file_read == file->m_max_items) { | |
- if (file->m_cur_file != -1) | |
- file->close_and_destroy_cur_file(); | |
- file->open_next_file(); | |
- } | |
- | |
- // Read the data from disk. | |
- long file_left = file->m_max_items - file->m_cur_file_read; | |
- long items_left = file->m_total_write - file->m_total_read_buf; | |
- long left = std::min(file_left, items_left); | |
- file->m_passive_buf_filled = std::min(left, file->m_buf_size); | |
- file->m_cur_file_read += file->m_passive_buf_filled; | |
- file->m_total_read_buf += file->m_passive_buf_filled; | |
- utils::read_n_objects_from_file(file->m_passive_buf, | |
- file->m_passive_buf_filled, file->m_file); | |
- | |
- // Let the caller know that the I/O thread finished reading. | |
- lk.lock(); | |
- file->m_avail = false; | |
- lk.unlock(); | |
- file->m_cv.notify_one(); | |
- } | |
- } | |
- | |
- void receive_new_buffer() { | |
- if (m_state != STATE_READING) { | |
- fprintf(stderr, "\nError: refilling in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Wait until the I/O thread finishes reading the revious | |
- // buffer. Most of the time this step is instantaneous. | |
- std::unique_lock<std::mutex> lk(m_mutex); | |
- while (m_avail == true) | |
- m_cv.wait(lk); | |
- | |
- // Set the new active buffer. | |
- std::swap(m_active_buf, m_passive_buf); | |
- m_active_buf_filled = m_passive_buf_filled; | |
- m_active_buf_pos = 0; | |
- | |
- // Let the I/O thead know that it can now | |
- // prefetch another buffer. | |
- m_avail = (m_total_read_buf < m_total_write); | |
- lk.unlock(); | |
- m_cv.notify_one(); | |
- } | |
- | |
- void open_next_file() { | |
- if (m_state != STATE_READING) { | |
- fprintf(stderr, "\nError: opening a new file in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- ++m_cur_file; | |
- m_file = utils::open_file(m_filename + ".part" + utils::intToStr(m_cur_file), "r"); | |
- m_cur_file_read = 0; | |
- } | |
- | |
- void make_new_file() { | |
- if (m_state != STATE_WRITING) { | |
- fprintf(stderr, "\nError: making new file in state %s\n", state_string().c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- m_file = utils::open_file(m_filename + ".part" + utils::intToStr(m_files_cnt), "w"); | |
- ++m_files_cnt; | |
- m_cur_file_write = 0; | |
- } | |
- | |
- | |
- enum { STATE_INIT, // right after creating (before init_writing) | |
- STATE_WRITING, // after initialize_writing, writing possible | |
- STATE_WRITTEN, // after finish_writing, waiting for initialize_reading | |
- STATE_READING, // after initialize_reading, reading possible | |
- STATE_READ // after finish_reading, waiting for death | |
- } m_state; | |
- | |
- std::FILE *m_file; // file handler | |
- std::string m_filename; // file name base | |
- long m_max_items; // max items per file | |
- | |
- // Buffers used for asynchronous reading. | |
- value_type *m_active_buf; | |
- value_type *m_passive_buf; | |
- long m_buf_size; | |
- long m_active_buf_pos; | |
- long m_active_buf_filled; | |
- long m_passive_buf_filled; | |
- | |
- // Various housekeeping statistics about the number of items. | |
- long m_cur_file_write; // number of items written to a current file | |
- long m_total_write; // total number of written items | |
- long m_cur_file_read; // number of items read from the current file | |
- long m_total_read_buf; // total number of items read from files into buffers | |
- long m_total_read_user; // total number of items read by the user | |
- | |
- // Used to keep track of file count. | |
- long m_files_cnt; // counts the files during writing | |
- long m_cur_file; // iterates through [0..m_files_cnt) during reading | |
- | |
- // For synchronization with thread doing asynchronous reading. | |
- std::thread *m_thread; | |
- std::mutex m_mutex; | |
- std::condition_variable m_cv; | |
- bool m_finished; | |
- bool m_avail; | |
-}; | |
- | |
-} // psascan_private | |
- | |
-#endif // __PSASCAN_SRC_DISTRIBUTED_FILE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/em_compute_initial_ranks.h b/exttools/pSAscan-0.1.0/src/psascan_src/em_compute_initial_ranks.h | |
deleted file mode 100644 | |
index 5ef2858e..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/em_compute_initial_ranks.h | |
+++ /dev/null | |
@@ -1,567 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/em_compute_initial_ranks.h | |
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_EM_COMPUTE_INITIAL_RANKS_INCLUDED | |
-#define __PSASCAN_SRC_EM_COMPUTE_INITIAL_RANKS_INCLUDED | |
- | |
-#include <string> | |
-#include <vector> | |
-#include <algorithm> | |
-#include <thread> | |
- | |
-#include "approx_rank.h" | |
-#include "sparse_isa.h" | |
-#include "background_block_reader.h" | |
-#include "background_chunk_reader.h" | |
-#include "multifile_bit_stream_reader.h" | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-// #define EM_STARTING_POS_MODULE_DEBUG_MODE | |
- | |
-inline int lcp_compare( | |
- const unsigned char *text, // only text[block_suf_beg..block_end) will be accessed | |
- long text_length, | |
- long block_end, // wrt to text beg | |
- long block_suf_beg, // wrt to text beg | |
- const unsigned char *pat, // only pat[lcp..pat_length) will be accessed | |
- long pat_beg, // wrt to text beg | |
- long pat_length, | |
- multifile_bit_stream_reader >_reader, | |
- long &lcp) { | |
- while (block_suf_beg + lcp < block_end && lcp < pat_length && | |
- text[block_suf_beg + lcp] == pat[lcp]) ++lcp; | |
- if (block_suf_beg + lcp >= block_end) { | |
- if (gt_reader.access(text_length - (pat_beg + (block_end - block_suf_beg)))) return 1; | |
- else return -1; | |
- } else if (lcp == pat_length) { | |
- if (pat_beg + pat_length >= text_length) return -1; | |
- else return 0; | |
- } else { | |
- if (pat[lcp] > text[block_suf_beg + lcp]) return 1; | |
- else return -1; | |
- } | |
-} | |
- | |
-template<typename saidx_t> | |
-void refine_range( | |
- const unsigned char *block, | |
- const saidx_t *block_psa, | |
- long block_beg, // wrt to text beg | |
- long block_end, // same here | |
- long pat_beg, // same here | |
- long text_length, | |
- long left, | |
- long right, | |
- long old_lcp, | |
- long new_lcp, | |
- const unsigned char *pat, // only pat[old_lcp..new_lcp) can and will be accessed | |
- multifile_bit_stream_reader >_reader, | |
- long &newleft, | |
- long &newright) { | |
- long low = left - 1; | |
- long high = right; | |
- long llcp = old_lcp; | |
- long rlcp = old_lcp; | |
- | |
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE | |
- long min_discrepancy = utils::random_long(0L, 10L); | |
- long balancing_factor = utils::random_long(1L, 10L); | |
-#else | |
- static const long min_discrepancy = (1L << 16); | |
- static const long balancing_factor = 64L; | |
-#endif | |
- | |
- const unsigned char *text = block - block_beg; | |
- while (low + 1 != high) { | |
- // Invariant: newleft is in the range (low, high]. | |
- long lcp = std::min(llcp, rlcp); | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else mid = (low + high) / 2; | |
- | |
- if (lcp_compare(text, text_length, block_end, block_beg + (long)block_psa[mid], | |
- pat, pat_beg, new_lcp, gt_reader, lcp) <= 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- newleft = high; | |
- | |
- if (rlcp >= new_lcp) { | |
- high = right; | |
- rlcp = old_lcp; | |
- | |
- while (low + 1 != high) { | |
- // Invariant: newright is in the range (low, high]. | |
- long lcp = std::min(llcp, rlcp); | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else mid = (low + high) / 2; | |
- | |
- if (lcp_compare(text, text_length, block_end, block_beg + (long)block_psa[mid], | |
- pat, pat_beg, new_lcp, gt_reader, lcp) < 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- } | |
- newright = high; | |
-} | |
- | |
-template<typename saidx_t> | |
-void em_compute_single_initial_rank( | |
- const unsigned char *block, | |
- const saidx_t *block_psa, | |
- long block_beg, // wrt to text beg | |
- long block_end, // same here | |
- long pat_beg, // same here | |
- long text_length, | |
- long max_lcp, | |
- std::string text_filename, | |
- const multifile *tail_gt_begin_reversed, | |
- std::pair<long, long> &result) { | |
- if (pat_beg == text_length) { | |
- result = std::make_pair(0, 0); | |
- return; | |
- } | |
- | |
- long block_size = block_end - block_beg; | |
- long pat_end = pat_beg + max_lcp; | |
- | |
- multifile_bit_stream_reader gt_reader(tail_gt_begin_reversed); | |
- | |
- // Reads text[pat_beg..pat_end) in chunks. | |
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE | |
- long chunk_length = utils::random_long(1L, 10L); | |
- background_chunk_reader *chunk_reader = | |
- new background_chunk_reader(text_filename, pat_beg, pat_end, chunk_length); | |
-#else | |
- background_chunk_reader *chunk_reader = | |
- new background_chunk_reader(text_filename, pat_beg, pat_end); | |
-#endif | |
- | |
- // The current range is [left, right). | |
- long left = 0; | |
- long right = block_size; | |
- long lcp = 0; | |
- | |
- while (left != right && lcp < max_lcp) { | |
- long this_chunk_length = std::min(max_lcp - lcp, chunk_reader->get_chunk_size()); | |
- long new_lcp = lcp + this_chunk_length; | |
- chunk_reader->wait(pat_beg + new_lcp); | |
- | |
- // Invariant: | |
- // reader->chunk[0..chunk_length) = pattern[lcp..new_lcp). | |
- long newleft = 0; | |
- long newright = 0; | |
- refine_range(block, block_psa, block_beg, block_end, pat_beg, text_length, left, | |
- right, lcp, new_lcp, chunk_reader->m_chunk - lcp, gt_reader, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- lcp = new_lcp; | |
- } | |
- | |
- delete chunk_reader; | |
- | |
- result = std::make_pair(left, right); | |
-} | |
- | |
-template<typename saidx_t> | |
-void em_compute_initial_ranks( | |
- const unsigned char *block, | |
- const saidx_t *block_psa, | |
- const unsigned char *block_pbwt, | |
- long i0, | |
- long block_beg, // wrt to text beg | |
- long block_end, // same here | |
- long text_length, | |
- std::string text_filename, | |
- const multifile *tail_gt_begin_reversed, | |
- std::vector<long> &result, | |
- long max_threads, | |
- long tail_end, | |
- long initial_rank_after_tail) { | |
- // Note, that bits of tail_gt_begin_reversed are indexed in the | |
- // range [text_length - tail_end.. text_length - block_end). This | |
- // is because the same multifile is then used in the streaming and | |
- // for streaming is much more natural to use this indexing. | |
- long block_length = block_end - block_beg; | |
- long tail_length = tail_end - block_end; | |
- long stream_max_block_size = (tail_length + max_threads - 1) / max_threads; | |
- long n_threads = (tail_length + stream_max_block_size - 1) / stream_max_block_size; | |
- | |
- std::vector<std::pair<long, long> > ranges(n_threads); | |
- std::thread **threads = new std::thread*[n_threads]; | |
- | |
- for (int t = n_threads - 1; t >= 0; --t) { | |
- long stream_block_beg = block_end + t * stream_max_block_size; | |
- long stream_block_end = std::min(stream_block_beg + stream_max_block_size, tail_end); | |
- long stream_block_size = stream_block_end - stream_block_beg; | |
- | |
- threads[t] = new std::thread(em_compute_single_initial_rank<saidx_t>, | |
- block, block_psa, block_beg, block_end, stream_block_beg, text_length, | |
- stream_block_size, text_filename, tail_gt_begin_reversed, std::ref(ranges[t])); | |
- } | |
- | |
- for (int t = 0; t < n_threads; ++t) threads[t]->join(); | |
- for (int t = 0; t < n_threads; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- // Refine ranges until all are single elements. | |
- result.resize(n_threads); | |
- | |
- bool nontrivial_range = false; | |
- for (long t = 0; t < n_threads; ++t) | |
- if (ranges[t].first != ranges[t].second) | |
- nontrivial_range = true; | |
- | |
- if (nontrivial_range) { | |
- multifile_bit_stream_reader *gt_reader = | |
- new multifile_bit_stream_reader(tail_gt_begin_reversed); | |
- | |
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE | |
- typedef approx_rank<1L> rank_type; | |
- typedef sparse_isa<rank_type, saidx_t, 1L> isa_type; | |
-#else | |
- typedef approx_rank<8L> rank_type; | |
- typedef sparse_isa<rank_type, saidx_t, 8L> isa_type; | |
-#endif | |
- rank_type *pbwt_rank = new rank_type(block_pbwt, block_length, max_threads); | |
- isa_type *block_sparse_isa = new isa_type(block_psa, block, block_length, i0, pbwt_rank, max_threads); | |
- | |
- long prev_rank = initial_rank_after_tail; | |
- for (long t = n_threads - 1; t >= 0; --t) { | |
- long stream_block_beg = block_end + t * stream_max_block_size; | |
- long stream_block_end = std::min(stream_block_beg + stream_max_block_size, tail_end); | |
- long stream_block_size = stream_block_end - stream_block_beg; | |
- | |
- long left = ranges[t].first; | |
- long right = ranges[t].second; | |
- | |
- while (left != right) { | |
- // Valid values for mid are in [left..right). | |
- long mid = (left + right) / 2; | |
- | |
- if ((long)block_psa[mid] + stream_block_size >= block_length) { | |
- if (gt_reader->access(text_length - (stream_block_beg + (block_length - (long)block_psa[mid])))) left = mid + 1; | |
- else right = mid; | |
- } else { | |
- long j = (long)block_psa[mid] + stream_block_size; | |
- if (block_sparse_isa->query(j) < prev_rank) left = mid + 1; | |
- else right = mid; | |
- } | |
- } | |
- | |
- result[t] = left; | |
- prev_rank = result[t]; | |
- } | |
- | |
- delete pbwt_rank; | |
- delete block_sparse_isa; | |
- delete gt_reader; | |
- } else { | |
- for (long t = 0; t < n_threads; ++t) | |
- result[t] = ranges[t].first; | |
- } | |
-} | |
- | |
-int lcp_compare_2( | |
- const unsigned char *text, // only text[block_suf_beg..block_end) will be accessed | |
- long text_length, | |
- long block_end, // wrt to text beg | |
- long block_suf_beg, // wrt to text beg | |
- const unsigned char *pat, // only pat[lcp..pat_length) will be accessed | |
- long pat_beg, // wrt to text beg | |
- long pat_length, | |
- long tail_begin, // wrt to text beg | |
- background_block_reader *mid_block_reader, | |
- multifile_bit_stream_reader >_reader, | |
- long &lcp) { | |
- while (block_suf_beg + lcp < block_end && lcp < pat_length && | |
- text[block_suf_beg + lcp] == pat[lcp]) ++lcp; | |
- if (block_suf_beg + lcp < block_end && lcp < pat_length) { | |
- if (pat[lcp] > text[block_suf_beg + lcp]) return 1; | |
- else return -1; | |
- } | |
- | |
- if (block_suf_beg + lcp >= block_end && block_end < tail_begin && lcp < pat_length) { | |
- // To finish the comparison, we need to access symbols from the mid block. | |
- // First, wait until enough symbols are available. | |
- mid_block_reader->wait(std::min(tail_begin, block_suf_beg + pat_length) - block_end); | |
- | |
- // Now continue the comparison. | |
- const unsigned char *text2 = mid_block_reader->m_data - block_end; | |
- while (block_suf_beg + lcp < tail_begin && lcp < pat_length && | |
- text2[block_suf_beg + lcp] == pat[lcp]) ++lcp; | |
- if (block_suf_beg + lcp < tail_begin && lcp < pat_length) { | |
- if (pat[lcp] > text2[block_suf_beg + lcp]) return 1; | |
- else return -1; | |
- } | |
- } | |
- | |
- if (block_suf_beg + lcp >= tail_begin) { | |
- // Use gt to resolve comparison. | |
- if (gt_reader.access(text_length - (pat_beg + (tail_begin - block_suf_beg)))) return 1; | |
- else return -1; | |
- } else { // lcp == pat_length | |
- if (pat_beg + pat_length >= text_length) return -1; | |
- else return 0; | |
- } | |
-} | |
- | |
-template<typename saidx_t> | |
-void refine_range_2( | |
- const unsigned char *block, | |
- const saidx_t *block_psa, | |
- long block_beg, // wrt to text beg | |
- long block_end, // same here | |
- long pat_beg, // same here | |
- long tail_begin, | |
- background_block_reader *mid_block_reader, | |
- long text_length, | |
- long left, | |
- long right, | |
- long old_lcp, | |
- long new_lcp, | |
- const unsigned char *pat, // only pat[old_lcp..new_lcp) can and will be accessed | |
- multifile_bit_stream_reader >_reader, | |
- long &newleft, | |
- long &newright) { | |
- long low = left - 1; | |
- long high = right; | |
- long llcp = old_lcp; | |
- long rlcp = old_lcp; | |
- | |
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE | |
- long min_discrepancy = utils::random_long(0L, 10L); | |
- long balancing_factor = utils::random_long(1L, 10L); | |
-#else | |
- static const long min_discrepancy = (1L << 16); | |
- static const long balancing_factor = 64L; | |
-#endif | |
- | |
- const unsigned char *text = block - block_beg; | |
- while (low + 1 != high) { | |
- // Invariant: newleft is in the range (low, high]. | |
- long lcp = std::min(llcp, rlcp); | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else mid = (low + high) / 2; | |
- | |
- if (lcp_compare_2(text, text_length, block_end, block_beg + (long)block_psa[mid], | |
- pat, pat_beg, new_lcp, tail_begin, mid_block_reader, gt_reader, lcp) <= 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- newleft = high; | |
- | |
- if (rlcp >= new_lcp) { | |
- high = right; | |
- rlcp = old_lcp; | |
- | |
- while (low + 1 != high) { | |
- // Invariant: newright is in the range (low, high]. | |
- long lcp = std::min(llcp, rlcp); | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else mid = (low + high) / 2; | |
- | |
- if (lcp_compare_2(text, text_length, block_end, block_beg + (long)block_psa[mid], | |
- pat, pat_beg, new_lcp, tail_begin, mid_block_reader, gt_reader, lcp) < 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- } | |
- newright = high; | |
-} | |
- | |
-template<typename saidx_t> | |
-void em_compute_single_initial_rank_2( | |
- const unsigned char *block, | |
- const saidx_t *block_psa, | |
- long block_beg, // wrt to text beg | |
- long block_end, // same here | |
- long pat_beg, // same here | |
- long text_length, | |
- long max_lcp, | |
- long tail_begin, | |
- background_block_reader *mid_block_reader, | |
- std::string text_filename, | |
- const multifile *tail_gt_begin_reversed, | |
- long &result) { | |
- if (pat_beg == text_length) { | |
- result = 0; | |
- return; | |
- } | |
- | |
- long block_size = block_end - block_beg; | |
- long pat_end = std::min(text_length, pat_beg + max_lcp); | |
- | |
- multifile_bit_stream_reader gt_reader(tail_gt_begin_reversed); | |
- | |
- // Reads text[pat_beg..pat_end) in chunks. | |
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE | |
- long chunk_length = utils::random_long(1L, 10L); | |
- background_chunk_reader *chunk_reader = | |
- new background_chunk_reader(text_filename, pat_beg, pat_end, chunk_length); | |
-#else | |
- background_chunk_reader *chunk_reader = | |
- new background_chunk_reader(text_filename, pat_beg, pat_end); | |
-#endif | |
- | |
- // The current range is [left, right). | |
- long left = 0; | |
- long right = block_size; | |
- long lcp = 0; | |
- | |
- while (left != right && lcp < max_lcp) { | |
- long this_chunk_length = std::min(max_lcp - lcp, chunk_reader->get_chunk_size()); | |
- long new_lcp = lcp + this_chunk_length; | |
- chunk_reader->wait(pat_beg + new_lcp); | |
- | |
- // Invariant: | |
- // reader->chunk[0..chunk_length) = pattern[lcp..new_lcp). | |
- long newleft = 0; | |
- long newright = 0; | |
- refine_range_2(block, block_psa, block_beg, block_end, pat_beg, tail_begin, | |
- mid_block_reader, text_length, left, right, lcp, new_lcp, | |
- chunk_reader->m_chunk - lcp, gt_reader, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- lcp = new_lcp; | |
- } | |
- result = left; | |
- | |
- delete chunk_reader; | |
-} | |
- | |
-template<typename saidx_t> | |
-void em_compute_initial_ranks( | |
- const unsigned char *block, | |
- const saidx_t *block_psa, | |
- long block_beg, // wrt to text beg | |
- long block_end, // same here | |
- long text_length, | |
- std::string text_filename, | |
- const multifile *tail_gt_begin_reversed, | |
- std::vector<long> &result, | |
- long max_threads, | |
- long tail_begin) { | |
- // Compute some initial parameters. | |
- long block_length = block_end - block_beg; | |
- long tail_length = text_length - tail_begin; | |
- long mid_block_beg = block_end; | |
- long mid_block_end = tail_begin; | |
- long mid_block_size = mid_block_end - mid_block_beg; | |
- long stream_max_block_size = (tail_length + max_threads - 1) / max_threads; | |
- long n_threads = (tail_length + stream_max_block_size - 1) / stream_max_block_size; | |
- | |
- // Start reading the text between the block and the tail in the backgrond. | |
- background_block_reader *mid_block_reader = | |
- new background_block_reader(text_filename, mid_block_beg, mid_block_size); | |
- | |
- // Compute the initial ranks. | |
- std::vector<long> res(n_threads); | |
- std::thread **threads = new std::thread*[n_threads]; | |
- | |
- for (int t = 0; t < n_threads; ++t) { | |
- long stream_block_beg = tail_begin + t * stream_max_block_size; | |
- long max_lcp = std::min(block_length + mid_block_size, text_length - stream_block_beg); | |
- | |
- threads[t] = new std::thread(em_compute_single_initial_rank_2<saidx_t>, | |
- block, block_psa, block_beg, block_end, stream_block_beg, text_length, | |
- max_lcp, tail_begin, mid_block_reader, text_filename, | |
- tail_gt_begin_reversed, std::ref(res[t])); | |
- } | |
- | |
- for (int t = 0; t < n_threads; ++t) threads[t]->join(); | |
- for (int t = 0; t < n_threads; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- mid_block_reader->stop(); | |
- delete mid_block_reader; | |
- | |
- result = res; | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_EM_COMPUTE_INITIAL_RANKS_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/gap_array.h b/exttools/pSAscan-0.1.0/src/psascan_src/gap_array.h | |
deleted file mode 100644 | |
index 364fab2f..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/gap_array.h | |
+++ /dev/null | |
@@ -1,535 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/gap_array.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_GAP_ARRAY_H_INCLUDED | |
-#define __PSASCAN_SRC_GAP_ARRAY_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstring> | |
-#include <vector> | |
-#include <mutex> | |
-#include <string> | |
-#include <thread> | |
-#include <algorithm> | |
-#include <parallel/algorithm> | |
- | |
-#include "utils.h" | |
-#include "bitvector.h" | |
-#include "parallel_utils.h" | |
-#include "async_stream_writer.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-struct buffered_gap_array { | |
- buffered_gap_array(long length, std::string storage_fname = std::string("")) { | |
- if (length <= 0L) { | |
- fprintf(stderr, "\nError: attempting to construct empty gap array.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- m_length = length; | |
- m_count = (unsigned char *)malloc(m_length); | |
- std::fill(m_count, m_count + m_length, 0); | |
- | |
- m_excess = new long[k_excess_limit]; | |
- | |
- // File used to store excess values. | |
- m_storage_filename = storage_fname; | |
- if (!m_storage_filename.length()) | |
- m_storage_filename = ".excess." + utils::random_string_hash(); | |
- | |
- m_excess_filled = 0L; | |
- m_excess_disk = 0L; | |
- m_sorted_excess = NULL; | |
- m_sequential_read_initialized = false; | |
- } | |
- | |
- void add_excess(long x) { | |
- m_excess[m_excess_filled++] = x; | |
- if (m_excess_filled == k_excess_limit) { | |
- m_gap_writing_mutex.lock(); | |
- m_excess_disk += m_excess_filled; | |
- utils::add_objects_to_file(m_excess, m_excess_filled, m_storage_filename); | |
- m_excess_filled = 0L; | |
- m_gap_writing_mutex.unlock(); | |
- } | |
- } | |
- | |
- void flush_excess_to_disk() { | |
- if (m_excess_filled > 0) { | |
- utils::add_objects_to_file(m_excess, m_excess_filled, m_storage_filename); | |
- m_excess_disk += m_excess_filled; | |
- m_excess_filled = 0L; | |
- } | |
- } | |
- | |
- void start_sequential_access() { | |
- if (!m_sequential_read_initialized) { | |
- m_sequential_read_initialized = true; | |
- m_total_excess = m_excess_filled + m_excess_disk; | |
- m_sorted_excess = (long *)malloc(m_total_excess * sizeof(long)); | |
- std::copy(m_excess, m_excess + m_excess_filled, m_sorted_excess); | |
- if (m_excess_disk > 0L) { | |
- long *dest = m_sorted_excess + m_excess_filled; | |
- long toread = m_excess_disk; | |
- utils::read_n_objects_from_file(dest, toread, m_storage_filename.c_str()); | |
- } | |
- std::sort(m_sorted_excess, m_sorted_excess + m_total_excess); | |
- } | |
- | |
- m_excess_ptr = 0; | |
- m_current_pos = 0; | |
- } | |
- | |
- inline long get_next() { | |
- long c = 0; | |
- while (m_excess_ptr < m_total_excess && m_sorted_excess[m_excess_ptr] == m_current_pos) | |
- ++m_excess_ptr, ++c; | |
- long result = c * 256L + m_count[m_current_pos]; | |
- | |
- ++m_current_pos; | |
- return result; | |
- } | |
- | |
- void stop_sequential_access() { | |
- if (m_sequential_read_initialized) { | |
- free(m_sorted_excess); | |
- m_sequential_read_initialized = false; | |
- } else { | |
- fprintf(stderr, "\nError: attempting to stop sequential " | |
- "access to the gap array before it was initialized.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- } | |
- | |
- std::mutex m_excess_mutex; | |
- std::mutex m_gap_writing_mutex; | |
- | |
- ~buffered_gap_array() { | |
- if (m_sequential_read_initialized) { | |
- fprintf(stderr, "\nError: sequential access to gap was not terminated."); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- free(m_count); | |
- delete[] m_excess; | |
- } | |
- | |
- void erase_disk_excess() { | |
- if (utils::file_exists(m_storage_filename)) | |
- utils::file_delete(m_storage_filename); | |
- } | |
- | |
- // Write to a given file using v-byte encoding. | |
- void save_to_file(std::string fname) { | |
- fprintf(stderr, " Write gap to file: "); | |
- long double gap_write_start = utils::wclock(); | |
- long bytes_written = 0L; | |
- | |
- start_sequential_access(); | |
- typedef async_stream_writer<unsigned char> stream_writer_type; | |
- stream_writer_type *writer = new stream_writer_type(fname); | |
- | |
- for (long j = 0; j < m_length; ++j) { | |
- long val = get_next(); | |
- while (val > 127) { | |
- writer->write((val & 0x7f) | 0x80); | |
- val >>= 7; | |
- ++bytes_written; | |
- } | |
- writer->write(val); | |
- } | |
- | |
- bytes_written += m_length; | |
- stop_sequential_access(); | |
- delete writer; | |
- | |
- long double gap_write_time = utils::wclock() - gap_write_start; | |
- long double io_speed = (bytes_written / (1024.L * 1024)) / gap_write_time; | |
- fprintf(stderr, "%.2Lf (%.2LfMiB/s)\n", gap_write_time, io_speed); | |
- } | |
- | |
- | |
- //============================================================================== | |
- // Note about the input: | |
- // - j is the maximal integer such that gapsum[j] + j <= beg. | |
- // - S contains value gapsum[j] + j. | |
- //============================================================================== | |
- static void convert_gap_to_bitvector_aux(long beg, long end, long j, long S, buffered_gap_array *gap, bitvector *bv) { | |
- // Initialize pointer to sorted excess values. | |
- long excess_pointer = std::lower_bound(gap->m_sorted_excess, | |
- gap->m_sorted_excess + gap->m_total_excess, j) - gap->m_sorted_excess; | |
- | |
- // Compute gap[j]. | |
- long gap_j = gap->m_count[j]; | |
- while (excess_pointer < gap->m_total_excess && gap->m_sorted_excess[excess_pointer] == j) { | |
- gap_j += 256L; | |
- ++excess_pointer; | |
- } | |
- | |
- long p = beg; | |
- long ones = std::min(end - p, gap_j - (beg - S)); | |
- for (long k = 0; k < ones; ++k) bv->set(p++); | |
- ++j; | |
- | |
- while (p < end) { | |
- ++p; | |
- | |
- // Compute gap[j]. | |
- gap_j = gap->m_count[j]; | |
- while (excess_pointer < gap->m_total_excess && gap->m_sorted_excess[excess_pointer] == j) { | |
- gap_j += 256L; | |
- ++excess_pointer; | |
- } | |
- | |
- ones = std::min(end - p, gap_j); | |
- | |
- for (long k = 0; k < ones; ++k) bv->set(p++); | |
- ++j; | |
- } | |
- } | |
- | |
- static void compute_j_aux(long range_beg, long n_chunks, long max_chunk_size, | |
- const long *sparse_gapsum, long &initial_gap_ptr, long &initial_gapsum_value, const buffered_gap_array *gap) { | |
- // Fast forward through as many chunks as possible. | |
- long j = 0L; | |
- long gapsum_j = 0L; // At any time gapsum_j = gap[0] + .. + gap[j - 1]. | |
- while (j + 1 < n_chunks && sparse_gapsum[j + 1] + (max_chunk_size * (j + 1)) <= range_beg) ++j; | |
- gapsum_j = sparse_gapsum[j]; | |
- j = (j * max_chunk_size); | |
- | |
- // Slowly find the right place in a single chunk. | |
- long excess_ptr = std::lower_bound(gap->m_sorted_excess, gap->m_sorted_excess + gap->m_total_excess, j) - gap->m_sorted_excess; | |
- while (j < gap->m_length) { | |
- long gap_j = gap->m_count[j]; | |
- while (excess_ptr < gap->m_total_excess && gap->m_sorted_excess[excess_ptr] == j) { | |
- gap_j += 256L; | |
- ++excess_ptr; | |
- } | |
- | |
- if (gapsum_j + gap_j + j + 1 <= range_beg) { | |
- gapsum_j += gap_j; | |
- ++j; | |
- } else break; | |
- } | |
- | |
- // Store the answer. | |
- initial_gap_ptr = j; | |
- initial_gapsum_value = gapsum_j + j; | |
- } | |
- | |
- | |
- static void compute_gapsum_for_chunk_group(long group_beg, long group_end, long max_chunk_size, | |
- long *sparse_gapsum, const buffered_gap_array *gap) { | |
- for (long chunk_id = group_beg; chunk_id < group_end; ++chunk_id) { | |
- long chunk_beg = chunk_id * max_chunk_size; | |
- long chunk_end = std::min(chunk_beg + max_chunk_size, gap->m_length); | |
- | |
- // Compute sum of gap values inside chunk. We assume that | |
- // the excess values are in RAM and were sorted. | |
- long occ = std::upper_bound(gap->m_sorted_excess, gap->m_sorted_excess + gap->m_total_excess, chunk_end - 1) | |
- - std::lower_bound(gap->m_sorted_excess, gap->m_sorted_excess + gap->m_total_excess, chunk_beg); | |
- long gap_sum_inside_chunk = 256L * std::max(0L, occ); | |
- for (long j = chunk_beg; j < chunk_end; ++j) | |
- gap_sum_inside_chunk += gap->m_count[j]; | |
- | |
- // Store the result. | |
- sparse_gapsum[chunk_id] = gap_sum_inside_chunk; | |
- } | |
- } | |
- | |
- bitvector* convert_to_bitvector(long max_threads) { | |
- // 1 | |
- // | |
- // The term chunks is used to compute sparse gapsum array. | |
- // Chunk is a length such that | |
- // gapsum[k] = gap[0] + gap[1] + .. + gap[k * max_chunk_size - 1] | |
- long max_chunk_size = std::min(4L << 20, (m_length + max_threads - 1) / max_threads); | |
- long n_chunks = (m_length + max_chunk_size - 1) / max_chunk_size; | |
- long *sparse_gapsum = (long *)malloc(n_chunks * sizeof(long)); | |
- | |
- | |
- // 2 | |
- // | |
- // Compute the sum of gap value inside each chunk. Since there can be | |
- // more chunks than threads, we split chunks into groups and let each | |
- // thread compute the sum of gap values inside the group of chunks. | |
- long chunk_group_size = (n_chunks + max_threads - 1) / max_threads; | |
- long n_chunk_groups = (n_chunks + chunk_group_size - 1) / chunk_group_size; | |
- | |
- start_sequential_access(); | |
- std::thread **threads = new std::thread*[n_chunk_groups]; | |
- for (long t = 0; t < n_chunk_groups; ++t) { | |
- long chunk_group_beg = t * chunk_group_size; | |
- long chunk_group_end = std::min(chunk_group_beg + chunk_group_size, n_chunks); | |
- | |
- threads[t] = new std::thread(compute_gapsum_for_chunk_group, chunk_group_beg, | |
- chunk_group_end, max_chunk_size, sparse_gapsum, this); | |
- } | |
- | |
- for (long t = 0; t < n_chunk_groups; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_chunk_groups; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- | |
- // 3 | |
- // | |
- // Compute comulative sum over sparse_gapsum array. | |
- long double gap_total_sum = 0L; | |
- for (long i = 0L; i < n_chunks; ++i) { | |
- long temp = sparse_gapsum[i]; | |
- sparse_gapsum[i] = gap_total_sum; | |
- gap_total_sum += temp; | |
- } | |
- | |
- | |
- // 4 | |
- // | |
- // Compute all initial gap pointers. For a thread handling range [beg..end), the | |
- // initial_gap_ptr values is the largest j, such that gapsum[j] + j <= beg. | |
- // After we find j, we store the value of gapsum[j] + j in initial_gapsum_value. | |
- long result_length = (m_length + gap_total_sum) - 1; | |
- bitvector *result = new bitvector(result_length + 1); // +1 is to make room for sentinel | |
- | |
- long max_range_size = (result_length + max_threads - 1) / max_threads; | |
- while (max_range_size & 7) ++max_range_size; | |
- long n_ranges = (result_length + max_range_size - 1) / max_range_size; | |
- | |
- long *initial_gap_ptr = new long[n_ranges]; | |
- long *initial_gapsum_value = new long[n_ranges]; | |
- | |
- threads = new std::thread*[n_ranges]; | |
- for (long t = 0; t < n_ranges; ++t) { | |
- long range_beg = t * max_range_size; | |
- threads[t] = new std::thread(compute_j_aux, range_beg, n_chunks, max_chunk_size, | |
- sparse_gapsum, std::ref(initial_gap_ptr[t]), std::ref(initial_gapsum_value[t]), this); | |
- } | |
- for (long t = 0; t < n_ranges; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_ranges; ++t) delete threads[t]; | |
- | |
- | |
- // 5 | |
- // | |
- // Compute the bitvector. Each thread fills in the range of bits. | |
- for (long t = 0; t < n_ranges; ++t) { | |
- long range_beg = t * max_range_size; | |
- long range_end = std::min(range_beg + max_range_size, result_length); | |
- | |
- threads[t] = new std::thread(convert_gap_to_bitvector_aux, range_beg, | |
- range_end, initial_gap_ptr[t], initial_gapsum_value[t], this, result); | |
- } | |
- | |
- for (long t = 0; t < n_ranges; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_ranges; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- delete[] initial_gap_ptr; | |
- delete[] initial_gapsum_value; | |
- stop_sequential_access(); | |
- free(sparse_gapsum); | |
- | |
- return result; | |
- } | |
- | |
- static const long k_excess_limit = (1L << 22); | |
- | |
- unsigned char *m_count; | |
- long m_length; | |
- long m_excess_filled; | |
- long m_excess_disk; | |
- long *m_excess; | |
- | |
- std::string m_storage_filename; | |
- | |
- bool m_sequential_read_initialized; | |
- long m_excess_ptr; | |
- long m_current_pos; | |
- | |
-public: | |
- long *m_sorted_excess; | |
- long m_total_excess; | |
-}; | |
- | |
- | |
-struct gap_array_2n { | |
- gap_array_2n(const buffered_gap_array *gap, long max_threads) { | |
- m_length = gap->m_length; | |
- m_count = (uint16_t *)malloc(m_length * sizeof(uint16_t)); | |
- parallel_utils::parallel_copy<unsigned char, uint16_t>(gap->m_count, m_count, m_length, max_threads); | |
- m_storage_filename = gap->m_storage_filename; | |
- m_excess_disk = gap->m_excess_disk; | |
- } | |
- | |
- gap_array_2n(long length) { | |
- m_length = length; | |
- m_count = (uint16_t *)malloc(m_length * sizeof(uint16_t)); | |
- } | |
- | |
- ~gap_array_2n() { | |
- if (m_count) | |
- free(m_count); | |
- } | |
- | |
- static void apply_excess_aux(gap_array_2n *gap, const long *tab, | |
- long block_beg, long block_end, uint64_t &initial_run_length) { | |
- long block_size = block_end - block_beg; | |
- | |
- // Each thread gathers excess values in a buffer and at the end | |
- // copies then to the gap array's mutex-protected m_excess vector. | |
- std::vector<long> excess_buffer; | |
- | |
- // Compute the length of initial run. | |
- initial_run_length = 1UL; | |
- while (initial_run_length < (uint64_t)block_size && tab[block_beg] == | |
- tab[block_beg + initial_run_length]) ++initial_run_length; | |
- | |
- // Update count values. | |
- for (long i = block_beg + initial_run_length; i < block_end; ++i) { | |
- long x = tab[i]; | |
- uint64_t value = (uint64_t)gap->m_count[x] + 256UL; | |
- if (value >= (1UL << 16)) { | |
- value -= (1UL << 16); | |
- excess_buffer.push_back(x); | |
- } | |
- gap->m_count[x] = value; | |
- } | |
- | |
- // Copy the excess values to the gap array's mutex-protected vector. | |
- std::unique_lock<std::mutex> lk(gap->m_excess_mutex); | |
- for (long i = 0; i < (long)excess_buffer.size(); ++i) | |
- gap->m_excess.push_back(excess_buffer[i]); | |
- lk.unlock(); | |
- } | |
- | |
- void apply_excess_from_disk(long ram_budget, long max_threads) { | |
- if (!m_excess_disk) return; | |
- | |
- // We only use half of the RAM for buffer, because we will use parallel | |
- // merge sort for sorting the buffer (which requires double the space | |
- // for the input). | |
- long elems = std::max(1L, ram_budget / (2L * (long)sizeof(long))); | |
- long *buffer = (long *)malloc(elems * sizeof(long)); | |
- | |
- std::FILE *f = utils::open_file(m_storage_filename.c_str(), "r"); | |
- std::thread **threads = new std::thread*[max_threads]; | |
- | |
- // After sorting the buffer, when we split it equally between threads | |
- // we obey the rule, the every thread only counts the number of | |
- // elements equal to the first element in the handled range, but does | |
- // not do any updates for these elements. This prevents two threads | |
- // trying to update the same elements in the m_count array. The | |
- // length of the first run is computed and returned by each thread. | |
- // It is then updated sequentially. | |
- uint64_t *first_run_length = new uint64_t[max_threads]; | |
- | |
- while (m_excess_disk > 0) { | |
- // Read a portion of excess values from disk. | |
- long toread = std::min(m_excess_disk, elems); | |
- utils::read_n_objects_from_file(buffer, toread, f); | |
- | |
- // Sort excess values in parallel. | |
- __gnu_parallel::sort(buffer, buffer + toread); | |
- | |
- // Update m_count and m_excess with elements from the buffer. | |
- // The buffer is dividied into blocks, each blocks handles one | |
- // block. Each thread updates the values except the first run | |
- // of the block, which is handled separatelly (sequentially). | |
- long max_block_size = (toread + max_threads - 1) / max_threads; | |
- long n_blocks = (toread + max_block_size - 1) / max_block_size; | |
- | |
- for (long t = 0; t < n_blocks; ++t) { | |
- long block_beg = t * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, toread); | |
- | |
- threads[t] = new std::thread(apply_excess_aux, this, buffer, | |
- block_beg, block_end, std::ref(first_run_length[t])); | |
- } | |
- | |
- for (long t = 0; t < n_blocks; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_blocks; ++t) delete threads[t]; | |
- | |
- // Sequentially handle the elements in the first run of each block. | |
- for (long t = 0; t < n_blocks; ++t) { | |
- long block_beg = t * max_block_size; | |
- long first = buffer[block_beg]; // first elements in the block | |
- | |
- uint64_t freq = (uint64_t)m_count[first] + (first_run_length[t] * 256L); | |
- while (freq >= (1UL << 16)) { | |
- freq -= (1UL << 16); | |
- m_excess.push_back(first); | |
- } | |
- m_count[first] = freq; | |
- } | |
- | |
- m_excess_disk -= toread; | |
- } | |
- | |
- __gnu_parallel::sort(m_excess.begin(), m_excess.end()); | |
- | |
- delete[] threads; | |
- delete[] first_run_length; | |
- | |
- std::fclose(f); | |
- free(buffer); | |
- } | |
- | |
- void set_count(long pos, long value) { | |
- while (value >= (1L << 16)) { | |
- m_excess.push_back(pos); | |
- value -= (1L << 16); | |
- } | |
- m_count[pos] = (uint64_t)value; | |
- } | |
- | |
- void erase_disk_excess() { | |
- if (utils::file_exists(m_storage_filename)) | |
- utils::file_delete(m_storage_filename); | |
- } | |
- | |
- uint16_t *m_count; | |
- | |
- long m_length; | |
- long m_excess_disk; | |
- | |
- std::mutex m_excess_mutex; | |
- std::string m_storage_filename; | |
- std::vector<long> m_excess; // all excess values are in RAM | |
-}; | |
- | |
-} // psascan_private | |
- | |
-#endif // __PSASCAN_SRC_GAP_ARRAY_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/gap_buffer.h b/exttools/pSAscan-0.1.0/src/psascan_src/gap_buffer.h | |
deleted file mode 100644 | |
index 053ff715..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/gap_buffer.h | |
+++ /dev/null | |
@@ -1,122 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/gap_buffer.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_GAP_BUFFER_H_INCLUDED | |
-#define __PSASCAN_SRC_GAP_BUFFER_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <queue> | |
-#include <mutex> | |
-#include <condition_variable> | |
- | |
- | |
-namespace psascan_private { | |
- | |
-template<typename value_type> | |
-struct gap_buffer { | |
- gap_buffer(long size_bytes, long n_increasers) | |
- : m_filled(0L), | |
- m_size(size_bytes / sizeof(value_type)) { | |
- m_content = new value_type[m_size]; | |
- | |
- sblock_size = new long[n_increasers]; | |
- sblock_beg = new long[n_increasers]; | |
- } | |
- | |
- ~gap_buffer() { | |
- delete[] m_content; | |
- delete[] sblock_size; | |
- delete[] sblock_beg; | |
- } | |
- | |
- long m_filled, m_size; | |
- value_type *m_content; | |
- | |
- long *sblock_size; | |
- long *sblock_beg; | |
-}; | |
- | |
-// Same class for the poll of empty and full gap buffers. | |
-template<typename value_type> | |
-struct gap_buffer_poll { | |
- typedef gap_buffer<value_type> gap_buffer_type; | |
- | |
- gap_buffer_poll(long worker_threads = 0L) { | |
- m_worker_threads = worker_threads; // unused for the poll of empty buffers. | |
- m_worker_threads_finished = 0L; | |
- } | |
- | |
- void add(gap_buffer_type *b) { | |
- m_queue.push(b); | |
- } | |
- | |
- bool available() const { | |
- return m_queue.size() > 0; | |
- } | |
- | |
- gap_buffer_type *get() { | |
- if (m_queue.empty()) { | |
- fprintf(stderr, "\nError: requesting a gap buffer from empty poll!\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- gap_buffer_type *ret = m_queue.front(); | |
- m_queue.pop(); | |
- | |
- return ret; | |
- } | |
- | |
- bool finished() const { | |
- return m_worker_threads_finished == m_worker_threads; | |
- } | |
- | |
- void increment_finished_workers() { | |
- ++m_worker_threads_finished; | |
- } | |
- | |
- std::condition_variable m_cv; | |
- std::mutex m_mutex; | |
- | |
-private: | |
- long m_worker_threads; | |
- long m_worker_threads_finished; // to detect when all threads finished | |
- | |
- std::queue<gap_buffer_type*> m_queue; | |
-}; | |
- | |
-} // psascan_private | |
- | |
-#endif // __PSASCAN_SRC_GAP_BUFFER_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/half_block_info.h b/exttools/pSAscan-0.1.0/src/psascan_src/half_block_info.h | |
deleted file mode 100644 | |
index 6ab7842e..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/half_block_info.h | |
+++ /dev/null | |
@@ -1,62 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/half_block_info.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_HALF_BLOCK_INFO_H_INCLUDED | |
-#define __PSASCAN_SRC_HALF_BLOCK_INFO_H_INCLUDED | |
- | |
-#include <string> | |
- | |
-#include "distributed_file.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-// Stores the information about half-blocks. | |
-template<typename block_offset_type> | |
-struct half_block_info { | |
- long beg; | |
- long end; | |
- | |
- std::string gap_filename; | |
- distributed_file<block_offset_type> *psa; | |
- | |
- bool operator < (const half_block_info &i) const { | |
- return beg < i.beg; | |
- } | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_HALF_BLOCK_INFO_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/bwtsa.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/bwtsa.h | |
deleted file mode 100644 | |
index 7ef48c1f..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/bwtsa.h | |
+++ /dev/null | |
@@ -1,74 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/bwtsa.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_BWTSA_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_BWTSA_H_INCLUDED | |
- | |
-#include "../uint40.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename sa_type> | |
-struct bwtsa_t { | |
- sa_type sa; | |
- unsigned char bwt; | |
- | |
- inline operator sa_type() const { | |
- return sa; | |
- } | |
- | |
- bwtsa_t() { | |
- } | |
- | |
- bwtsa_t(long x) { | |
- sa = (sa_type)x; | |
- } | |
- | |
- bwtsa_t(int x) { | |
- sa = (sa_type)x; | |
- } | |
- | |
- bwtsa_t(uint40 x) { | |
- sa = (sa_type)x; | |
- } | |
- | |
-} __attribute__((packed)); | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_BWTSA_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/change_gt_reference_point.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/change_gt_reference_point.h | |
deleted file mode 100644 | |
index bf13cde7..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/change_gt_reference_point.h | |
+++ /dev/null | |
@@ -1,158 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/change_gt_reference_point.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section DESCRIPTION | |
- * | |
- * In-place computation of gt_begin bitvector from gt_end bitvector | |
- * (reversed). The procedure uses the string range matching algorithm | |
- * described in | |
- * | |
- * Juha Karkkainen, Dominik Kempa, Simon J. Puglisi: | |
- * String Range Matching. | |
- * In Proc. CPM 2014, p. 232-241. | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_CHANGE_GT_REFERENCE_POINT_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_CHANGE_GT_REFERENCE_POINT_H_INCLUDED | |
- | |
-#include <cstring> | |
-#include <algorithm> | |
-#include <thread> | |
- | |
-#include "../bitvector.h" | |
-#include "srank_aux.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-//============================================================================== | |
-// Compute range [microblock_beg..microblock_end) of bits in the output | |
-// bitvector gt_out. | |
-//============================================================================== | |
-void gt_end_to_gt_begin_aux(const unsigned char *text, long text_length, | |
- long block_beg, long block_end, bitvector *gt) { | |
- long block_size = block_end - block_beg; | |
- const unsigned char *pat = text + block_beg, *txt = pat; | |
- | |
- long i = 1, el = 0L, s = 0L, p = 0L; | |
- long i_max = i, el_max = 0L, s_max = 0L, p_max = 0L; | |
- | |
- long rev_end = text_length - block_beg; | |
- while (i < block_size) { | |
- // Compute lcp(text[left_block_beg..), text[left_block_beg+i..), | |
- // but compare not more than left_block_size symbols (we have gt | |
- // to resolve the long comparisons). | |
- while (block_beg + i + el < block_end && txt[i + el] == pat[el]) | |
- update_ms(pat, ++el, s, p); | |
- | |
- if (((block_beg + i + el != block_end && txt[i + el] > pat[el]) || | |
- (block_beg + i + el == block_end && !gt->get(rev_end - i)))) | |
- gt->set(rev_end - i); | |
- else gt->reset(rev_end - i); | |
- | |
- long j = i_max; | |
- if (el > el_max) { | |
- std::swap(el, el_max); | |
- std::swap(s, s_max); | |
- std::swap(p, p_max); | |
- i_max = i; | |
- } | |
- | |
- if (el < 100) { | |
- ++i; | |
- el = 0; | |
- } else if (p > 0L && (p << 2) <= el && !memcmp(pat, pat + p, s)) { | |
- long maxk = std::min(block_size - i, p); | |
- for (long k = 1L; k < maxk; ++k) { | |
- if (gt->get(rev_end - (j + k))) gt->set(rev_end - (i + k)); | |
- else gt->reset(rev_end - (i + k)); | |
- } | |
- | |
- i += p; | |
- el -= p; | |
- } else { | |
- long h = (el >> 2) + 1L; | |
- long maxk = std::min(h, block_size - i); | |
- for (long k = 1L; k < maxk; ++k) { | |
- if (gt->get(rev_end - (j + k))) gt->set(rev_end - (i + k)); | |
- else gt->reset(rev_end - (i + k)); | |
- } | |
- | |
- i += h; | |
- el = 0; | |
- s = 0; | |
- p = 0; | |
- } | |
- } | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Change gt_end bitvector into gt_begin using string range matching. | |
-//============================================================================== | |
-void gt_end_to_gt_begin(const unsigned char *text, long text_length, | |
- bitvector *gt, long max_block_size) { | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: Compute the last bit in every block. | |
- //---------------------------------------------------------------------------- | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size; | |
- long rev_beg = text_length - block_end; | |
- gt->flip(rev_beg); | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: compute remaining bits in every block. | |
- //---------------------------------------------------------------------------- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- | |
- threads[i] = new std::thread(gt_end_to_gt_begin_aux, | |
- text, text_length, block_beg, block_end, gt); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_CHANGE_GT_REFERENCE_POINT_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/compute_initial_gt_bitvectors.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/compute_initial_gt_bitvectors.h | |
deleted file mode 100644 | |
index 54ca636a..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/compute_initial_gt_bitvectors.h | |
+++ /dev/null | |
@@ -1,361 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/compute_initial_gt_bitvectors.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section DESCRIPTION | |
- * | |
- * Parallel computation of gt_end bitvectors. The procedure uses the | |
- * string range matching algorithm described in | |
- * | |
- * Juha Karkkainen, Dominik Kempa, Simon J. Puglisi: | |
- * String Range Matching. | |
- * In Proc. CPM 2014, p. 232-241. | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_COMPUTE_INITIAL_GT_BITVECTORS_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_COMPUTE_INITIAL_GT_BITVECTORS_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstring> | |
-#include <algorithm> | |
-#include <thread> | |
- | |
-#include "../bitvector.h" | |
-#include "../multifile.h" | |
-#include "../multifile_bit_stream_reader.h" | |
-#include "../background_block_reader.h" | |
-#include "srank_aux.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-void compute_partial_gt_end(const unsigned char *text, long text_length, | |
- long begin, long end, long max_lcp, bitvector *gt, bitvector *undecided, | |
- bool &all_decided, long text_end, long supertext_length, | |
- const multifile *tail_gt_begin_rev, | |
- background_block_reader *tail_prefix_background_reader, | |
- const unsigned char *tail_prefix_preread) { | |
- bool res = true; | |
- all_decided = true; | |
- long revbeg = text_length - end; | |
- | |
- if (end == text_length) { | |
- // It's ok if tail_gt_begin_rev is NULL | |
- multifile_bit_stream_reader tail_gt_beg_rev(tail_gt_begin_rev); | |
- long tail_length = supertext_length - text_end; | |
- long range_size = end - begin; | |
- long tail_prefix_length = std::min(text_length, tail_length); | |
- long tail_prefix_fetched = 0; | |
- | |
- const unsigned char *txt = text + begin; | |
- const unsigned char *tail_prefix = NULL; | |
- | |
- if (tail_prefix_length > 0) { | |
- if (tail_prefix_preread != NULL) { | |
- // Whole tail prefix is already in memory. | |
- tail_prefix = tail_prefix_preread; | |
- tail_prefix_fetched = tail_prefix_length; | |
- } else { | |
- // Tail prefix will be fetched asynchronously in the background. | |
- tail_prefix = tail_prefix_background_reader->m_data; | |
- tail_prefix_fetched = 0; | |
- } | |
- } | |
- | |
- long i = 0, el = 0, s = 0, p = 0; | |
- long i_max = 0, el_max = 0, s_max = 0, p_max = 0; | |
- | |
- static const long chunk_size = (1L << 20); | |
- | |
- while (i < range_size) { | |
- while (i + el < range_size && el < tail_length) { | |
- if (el == tail_prefix_fetched) { | |
- long next_chunk = std::min(chunk_size, | |
- tail_prefix_length - tail_prefix_fetched); | |
- tail_prefix_fetched += next_chunk; | |
- tail_prefix_background_reader->wait(tail_prefix_fetched); | |
- } | |
- while (i + el < range_size && el < tail_length && | |
- el < tail_prefix_fetched && txt[i + el] == tail_prefix[el]) | |
- update_ms(tail_prefix, ++el, s, p); | |
- if (el < tail_prefix_fetched) break; | |
- } | |
- | |
- if ((el == tail_length) || | |
- (i + el == range_size && !tail_gt_beg_rev.access(tail_length - el)) || | |
- (i + el < range_size && txt[i + el] > tail_prefix[el])) | |
- gt->set(revbeg + i); | |
- | |
- long j = i_max; | |
- if (el > el_max) { | |
- std::swap(el, el_max); | |
- std::swap(s, s_max); | |
- std::swap(p, p_max); | |
- i_max = i; | |
- } | |
- | |
- if (el < 100) { | |
- ++i; | |
- el = 0; | |
- } else if (p > 0 && (p << 2) <= el && | |
- !memcmp(tail_prefix, tail_prefix + p, s)) { | |
- long maxk = std::min(p, range_size - i); | |
- for (long k = 1; k < maxk; ++k) | |
- if (gt->get(revbeg + j + k)) gt->set(revbeg + i + k); | |
- i += p; | |
- el -= p; | |
- } else { | |
- long h = (el >> 2) + 1; | |
- long maxk = std::min(h, range_size - i); | |
- for (long k = 1; k < maxk; ++k) | |
- if (gt->get(revbeg + j + k)) gt->set(revbeg + i + k); | |
- i += h; | |
- el = 0; | |
- p = 0; | |
- s = 0; | |
- } | |
- } | |
- } else { | |
- long i = 0, el = 0, s = 0, p = 0; | |
- long i_max = 0, el_max = 0, s_max = 0, p_max = 0; | |
- | |
- const unsigned char *txt = text + begin; | |
- const unsigned char *pat = text + end; | |
- long range_size = end - begin; | |
- | |
- while (i < range_size) { | |
- while (el < max_lcp && txt[i + el] == pat[el]) | |
- update_ms(pat, ++el, s, p); | |
- | |
- if (el < max_lcp) { | |
- if (txt[i + el] > pat[el]) gt->set(revbeg + i); | |
- } else { | |
- undecided->set(revbeg + i); | |
- res = false; | |
- } | |
- | |
- long j = i_max; | |
- if (el > el_max) { | |
- std::swap(el, el_max); | |
- std::swap(s, s_max); | |
- std::swap(p, p_max); | |
- i_max = i; | |
- } | |
- | |
- if (el < 100) { | |
- ++i; | |
- el = 0; | |
- } else if (p > 0 && (p << 2) <= el && !memcmp(pat, pat + p, s)) { | |
- long maxk = std::min(p, range_size - i); | |
- for (long k = 1; k < maxk; ++k) { | |
- if (undecided->get(revbeg + (j + k))) undecided->set(revbeg + (i + k)); | |
- if (gt->get(revbeg + (j + k))) gt->set(revbeg + (i + k)); | |
- } | |
- | |
- i += p; | |
- el -= p; | |
- } else { | |
- long h = (el >> 2) + 1; | |
- long maxk = std::min(h, range_size - i); | |
- for (long k = 1; k < maxk; ++k) { | |
- if (undecided->get(revbeg + (j + k))) undecided->set(revbeg + (i + k)); | |
- if (gt->get(revbeg + (j + k))) gt->set(revbeg + (i + k)); | |
- } | |
- | |
- i += h; | |
- el = 0; | |
- s = 0; | |
- p = 0; | |
- } | |
- } | |
- } | |
- | |
- all_decided = res; | |
-} | |
- | |
-//============================================================================== | |
-// Set all undecided bits inside the given microblock (that is, the range | |
-// [mb_beg..mb_end)) of all gt bitvectors to their correct values. | |
-//============================================================================== | |
-void compute_final_gt(long text_length, long max_block_size, long mb_beg, | |
- long mb_end, bitvector *gt, const bitvector *undecided, | |
- const bool *all_decided) { | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- | |
- // Go through blocks right to left. | |
- for (long t = n_blocks - 2; t >= 0; --t) { | |
- long block_end = text_length - (n_blocks - 1 - t) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long this_block_size = block_end - block_beg; | |
- long this_mb_beg = mb_beg; | |
- long this_mb_end = std::min(mb_end, this_block_size); | |
- | |
- long rev_beg = text_length - block_end; | |
- long rev_end = text_length - block_beg; | |
- | |
- if (!all_decided[t]) { | |
- // This eliminates the problem with accessing bits located in the same | |
- // byte in the bitvector. Skipped bits are later updated sequentially. | |
- while (((rev_end - 1 - this_mb_beg) & 7) != 7) ++this_mb_beg; | |
- for (long j = this_mb_beg; j < this_mb_end; ++j) | |
- if (undecided->get(rev_end - 1 - j) && gt->get(rev_beg - 1 - j)) | |
- gt->set(rev_end - 1 - j); | |
- } | |
- } | |
-} | |
- | |
-//============================================================================== | |
-// Update the bits omitted in compute_final_gt. | |
-//============================================================================== | |
-void compute_final_gt_last_bits(long text_length, long max_block_size, | |
- long mb_beg, long mb_end, bitvector *gt, const bitvector *undecided, | |
- bool *all_decided) { | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- if (!all_decided[0]) { | |
- long block_end = text_length - (n_blocks - 1) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long this_block_size = block_end - block_beg; | |
- long this_mb_beg = mb_beg; | |
- long this_mb_end = std::min(mb_end, this_block_size); | |
- | |
- long rev_beg = text_length - block_end; | |
- long rev_end = text_length - block_beg; | |
- | |
- long temp_this_mb_beg = this_mb_beg; | |
- while (((rev_end - 1 - temp_this_mb_beg) & 7) != 7) ++temp_this_mb_beg; | |
- this_mb_end = temp_this_mb_beg; | |
- | |
- // [this_mb_beg..this_mb_end) were omitted. | |
- for (long j = this_mb_beg; j < this_mb_end; ++j) | |
- if (undecided->get(rev_end - 1 - j) && gt->get(rev_beg - 1 - j)) | |
- gt->set(rev_end - 1 - j); | |
- } | |
-} | |
- | |
-//============================================================================== | |
-// Fully parallel computation of gt bitvectors. | |
-//============================================================================== | |
-void compute_initial_gt_bitvectors(const unsigned char *text, long text_length, | |
- bitvector *gt, long max_block_size, long max_threads, long text_end, | |
- long supertext_length, const multifile *tail_gt_begin_reversed, | |
- background_block_reader *tail_prefix_background_reader, | |
- const unsigned char *tail_prefix_preread) { | |
- long double start; | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: compute gt bitvectors, some bits may still be undecided after this. | |
- //---------------------------------------------------------------------------- | |
- | |
- // Allocate ane zero-initialize (in parallel) bitvectors. | |
- fprintf(stderr, " Allocating: "); | |
- start = utils::wclock(); | |
- bitvector *undecided = new bitvector(text_length); | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- // all_decided[i] == true, if all bits inside block i were | |
- // decided in the first stage. This can be used by threads in the | |
- // second stage to completely skip inspecting some blocks. | |
- bool *all_decided = new bool[n_blocks]; | |
- | |
- // Process blocks right-to-left. | |
- fprintf(stderr, " Computing decided bits: "); | |
- start = utils::wclock(); | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- | |
- // Compute bitvectors 'gt' and 'undecided' for block i. | |
- threads[i] = new std::thread(compute_partial_gt_end, | |
- text, text_length, block_beg, block_end, max_block_size, gt, | |
- undecided, std::ref(all_decided[i]), text_end, supertext_length, | |
- tail_gt_begin_reversed, tail_prefix_background_reader, | |
- tail_prefix_preread); | |
- } | |
- | |
- // Wait for the threads to finish and clean up. | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: compute the undecided bits in the gt bitvectors. | |
- //---------------------------------------------------------------------------- | |
- | |
- // The size of micro block has to be a multiple of 8, otherwise two | |
- // threads might try to update the same char inside bitvector. | |
- long max_microblock_size = (max_block_size + max_threads - 1) / max_threads; | |
- while ((max_microblock_size & 7) && max_microblock_size < max_block_size) | |
- ++max_microblock_size; | |
- long n_microblocks = (max_block_size + max_microblock_size - 1) / max_microblock_size; | |
- | |
- fprintf(stderr, " Computing undecided bits: "); | |
- start = utils::wclock(); | |
- threads = new std::thread*[n_microblocks]; | |
- for (long i = 0; i < n_microblocks; ++i) { | |
- long mb_beg = i * max_microblock_size; | |
- long mb_end = std::min(mb_beg + max_microblock_size, max_block_size); | |
- | |
- threads[i] = new std::thread(compute_final_gt, text_length, max_block_size, | |
- mb_beg, mb_end, std::ref(gt), std::ref(undecided), all_decided); | |
- } | |
- | |
- // Wait for the threads to finish and clean up. | |
- for (long i = 0; i < n_microblocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_microblocks; ++i) delete threads[i]; | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- // Fill in the skipped (due to parallel byte access issue) undecided bits. | |
- for (long i = 0; i < n_microblocks; ++i) { | |
- long mb_beg = i * max_microblock_size; | |
- long mb_end = std::min(mb_beg + max_microblock_size, max_block_size); | |
- | |
- compute_final_gt_last_bits(text_length, max_block_size, mb_beg, mb_end, | |
- gt, undecided, all_decided); | |
- } | |
- | |
- fprintf(stderr, " Deallocating: "); | |
- start = utils::wclock(); | |
- delete[] threads; | |
- delete undecided; | |
- delete[] all_decided; | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_COMPUTE_INITIAL_GT_BITVECTORS_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/divsufsort_template.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/divsufsort_template.h | |
deleted file mode 100644 | |
index 86c5c903..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/divsufsort_template.h | |
+++ /dev/null | |
@@ -1,69 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/divsufsort_template.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_DIVSUFSORT_TEMPLATE_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_DIVSUFSORT_TEMPLATE_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
- | |
-#include "divsufsort.h" | |
-#include "divsufsort64.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename T> | |
-void run_divsufsort(const unsigned char *, T*, T) { | |
- fprintf(stderr, "\ndivsufsort: non-standard call. Use either" | |
- "int or long for second and third argument.\n"); | |
- std::exit(EXIT_FAILURE); | |
-} | |
- | |
-template<> | |
-void run_divsufsort(const unsigned char *text, int *sa, int length) { | |
- divsufsort(text, sa, length); | |
-} | |
- | |
-template<> | |
-void run_divsufsort(const unsigned char *text, long *sa, long length) { | |
- divsufsort64(text, sa, length); | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_DIVSUFSORT_TEMPLATE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/initial_partial_sufsort.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/initial_partial_sufsort.h | |
deleted file mode 100644 | |
index b298f9d2..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/initial_partial_sufsort.h | |
+++ /dev/null | |
@@ -1,301 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/initial_partial_sufsort.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INITIAL_PARTIAL_SUFSORT_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INITIAL_PARTIAL_SUFSORT_H_INCLUDED | |
- | |
-#include <algorithm> | |
-#include <thread> | |
- | |
-#include "../bitvector.h" | |
-#include "divsufsort_template.h" | |
-#include "bwtsa.h" | |
-#include "parallel_shrink.h" | |
-#include "parallel_expand.h" | |
-#include "parallel_copy.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-//============================================================================== | |
-// Rename the given block using its gt bitvector. | |
-//============================================================================== | |
-void rename_block(unsigned char *text, long text_length, long block_beg, | |
- long block_length, bitvector *gt, bool &renaming_error) { | |
- long block_end = block_beg + block_length; | |
- long beg_rev = text_length - block_end; | |
- unsigned char *block = text + block_beg; | |
- unsigned char last = block[block_length - 1]; | |
- bool err = false; | |
- for (long i = 0; i + 1 < block_length; ++i) | |
- if (block[i] > last || (block[i] == last && gt->get(beg_rev + i + 1))) { | |
- if (block[i] == 255) | |
- err = true; | |
- ++block[i]; | |
- } | |
- if (block[block_length - 1] == 255) | |
- err = true; | |
- ++block[block_length - 1]; | |
- | |
- if (err) | |
- renaming_error = true; | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Re-rename block back to original. | |
-//============================================================================== | |
-void rerename_block(unsigned char *block, long block_length) { | |
- unsigned char last = block[block_length - 1] - 1; | |
- for (long i = 0; i < block_length; ++i) | |
- if (block[i] > last) --block[i]; | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Given gt bitvectors, compute partial suffix arrays of blocks. | |
-//============================================================================== | |
-template<typename saidx_t> | |
-void initial_partial_sufsort(unsigned char *, long, bitvector *, | |
- bwtsa_t<saidx_t> *, long, long, bool) { | |
- fprintf(stderr, "Error: initial_partial_sufsort: given saidx_t is " | |
- "not supported, sizeof(saidx_t) = %ld\n", (long)sizeof(saidx_t)); | |
- std::exit(EXIT_FAILURE); | |
-} | |
- | |
-template<> | |
-void initial_partial_sufsort(unsigned char *text, long text_length, | |
- bitvector* gt, bwtsa_t<uint40> *bwtsa, long max_block_size, | |
- long max_threads, bool has_tail) { | |
- long double start = utils::wclock(); | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: Rename the blocks in parallel. | |
- //---------------------------------------------------------------------------- | |
- | |
- if (n_blocks > 1 || has_tail) { | |
- fprintf(stderr, " Renaming blocks: "); | |
- start = utils::wclock(); | |
- bool *renaming_error = new bool[n_blocks]; | |
- std::fill(renaming_error, renaming_error + n_blocks, false); | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(rename_block, text, text_length, block_beg, | |
- block_size, gt, std::ref(renaming_error[i])); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- bool err = false; | |
- for (long i = 0; i < n_blocks; ++i) | |
- if (renaming_error[i]) err = true; | |
- delete[] renaming_error; | |
- | |
- if (err) { | |
- fprintf(stdout, "\n\nError: byte with value 255 was detected in the input text!\n" | |
- "See the section on limitations in the README for more information.\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- } | |
- | |
- if (max_block_size >= (2L << 30)) { // Use 64-bit divsufsort. | |
- fprintf(stdout, "\nError: 2GiB+ partial suffix arrays are not " | |
- "yet supported by the internal-memory pSAscan.\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } else { // Use 32-bit divsufsort. | |
- int *temp_sa = (int *)bwtsa; | |
- | |
- //-------------------------------------------------------------------------- | |
- // STEP 2: Compute suffix arrays in parallel. | |
- //-------------------------------------------------------------------------- | |
- fprintf(stderr, " Running divsufsort32 in parallel: "); | |
- start = utils::wclock(); | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(run_divsufsort<int>, | |
- text + block_beg, temp_sa + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- fprintf(stderr, " Expanding 32-bit integers to bwtsa objects: "); | |
- start = utils::wclock(); | |
- parallel_expand<int, bwtsa_t<uint40> >(temp_sa, text_length, max_threads); | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 3: Restore the original text. | |
- //---------------------------------------------------------------------------- | |
- if (n_blocks > 1 || has_tail) { | |
- fprintf(stderr, " Rerenaming blocks: "); | |
- start = utils::wclock(); | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(rerename_block, | |
- text + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- } | |
-} | |
- | |
-template<> | |
-void initial_partial_sufsort(unsigned char *text, long text_length, | |
- bitvector* gt, bwtsa_t<int> *bwtsa, long max_block_size, long max_threads, | |
- bool has_tail) { | |
- long double start = utils::wclock(); | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: Rename the blocks in parallel. | |
- //---------------------------------------------------------------------------- | |
- if (n_blocks > 1 || has_tail) { | |
- fprintf(stderr, " Renaming blocks: "); | |
- start = utils::wclock(); | |
- bool *renaming_error = new bool[n_blocks]; | |
- std::fill(renaming_error, renaming_error + n_blocks, false); | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(rename_block, text, text_length, block_beg, | |
- block_size, gt, std::ref(renaming_error[i])); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- bool err = false; | |
- for (long i = 0; i < n_blocks; ++i) | |
- if (renaming_error[i]) err = true; | |
- delete[] renaming_error; | |
- | |
- if (err) { | |
- fprintf(stdout, "\n\nError: byte with value 255 was detected in the input text!\n" | |
- "See the section on limitations in the README for more information.\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- } | |
- | |
- int *temp_sa = (int *)bwtsa; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: Compute suffix arrays in parallel. | |
- //---------------------------------------------------------------------------- | |
- fprintf(stderr, " Running divsufsort32 in parallel: "); | |
- start = utils::wclock(); | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(run_divsufsort<int>, | |
- text + block_beg, temp_sa + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- fprintf(stderr, " Expanding 32-bit integers to bwtsa objects: "); | |
- start = utils::wclock(); | |
- parallel_expand<int, bwtsa_t<int> >(temp_sa, text_length, max_threads); | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 3: Restore the original text. | |
- //---------------------------------------------------------------------------- | |
- if (n_blocks > 1 || has_tail) { | |
- fprintf(stderr, " Rerenaming blocks: "); | |
- start = utils::wclock(); | |
- threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(rerename_block, | |
- text + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- } | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INITIAL_PARTIAL_SUFSORT_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwt_from_sa.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwt_from_sa.h | |
deleted file mode 100644 | |
index c88901f8..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwt_from_sa.h | |
+++ /dev/null | |
@@ -1,90 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/inmem_bwt_from_sa.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWT_FROM_SA_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWT_FROM_SA_H_INCLUDED | |
- | |
-#include <algorithm> | |
-#include <thread> | |
- | |
-#include "../utils.h" | |
-#include "bwtsa.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename saidx_t> | |
-void compute_bwt_in_bwtsa_aux(const unsigned char *text, long beg, | |
- long end, bwtsa_t<saidx_t> *dest, long *i0) { | |
- *i0 = -1; | |
- for (long j = beg; j < end; ++j) { | |
- if (dest[j].sa) dest[j].bwt = text[dest[j].sa - 1]; | |
- else { dest[j].bwt = 0; *i0 = j; } | |
- } | |
-} | |
- | |
-template<typename saidx_t> | |
-void compute_bwt_in_bwtsa(const unsigned char *text, long length, | |
- bwtsa_t<saidx_t> *dest, long max_threads, long &result) { | |
- long max_block_size = (length + max_threads - 1) / max_threads; | |
- long n_blocks = (length + max_block_size - 1) / max_block_size; | |
- long *index_0 = new long[n_blocks]; | |
- | |
- // Compute bwt and find i0, where sa[i0] == 0. | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_beg = i * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- | |
- threads[i] = new std::thread(compute_bwt_in_bwtsa_aux<saidx_t>, | |
- text, block_beg, block_end, dest, index_0 + i); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- // Find and return i0. | |
- result = -1; | |
- for (long i = 0; i < n_blocks; ++i) | |
- if (index_0[i] != -1) result = index_0[i]; | |
- delete[] index_0; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWT_FROM_SA_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwtsa_merge.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwtsa_merge.h | |
deleted file mode 100644 | |
index 108638a5..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwtsa_merge.h | |
+++ /dev/null | |
@@ -1,200 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/inmem_bwtsa_merge.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWTSA_MERGE_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWTSA_MERGE_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <vector> | |
-#include <string> | |
-#include <algorithm> | |
- | |
-#include "../bitvector.h" | |
-#include "../multifile.h" | |
-#include "inmem_gap_array.h" | |
-#include "inmem_compute_gap.h" | |
-#include "parallel_merge.h" | |
-#include "pagearray.h" | |
-#include "bwtsa.h" | |
-#include "merge_schedule.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename saidx_t, unsigned pagesize_log> | |
-pagearray<bwtsa_t<saidx_t>, pagesize_log> *inmem_bwtsa_merge( | |
- const unsigned char *text, | |
- long text_length, | |
- bwtsa_t<saidx_t> *bwtsa, | |
- bitvector *gt, | |
- long max_block_size, | |
- long range_beg, | |
- long range_end, | |
- long max_threads, | |
- bool need_gt, | |
- bool need_bwt, | |
- long &result_i0, | |
- MergeSchedule &schedule, | |
- long text_beg, | |
- long text_end, | |
- long supertext_length, | |
- std::string supertext_filename, | |
- const multifile *tail_gt_begin_reversed, | |
- long *i0_array, | |
- long **block_rank_matrix) { | |
- typedef pagearray<bwtsa_t<saidx_t>, pagesize_log> pagearray_type; | |
- | |
- long shift = (max_block_size - text_length % max_block_size) % max_block_size; | |
- long range_size = range_end - range_beg; | |
- | |
- if (range_size == 1) { | |
- long block_beg = range_beg * max_block_size; | |
- long block_end = block_beg + max_block_size; | |
- block_beg = std::max(0L, block_beg - shift); | |
- block_end -= shift; | |
- | |
- result_i0 = i0_array[range_beg]; | |
- pagearray_type *bwtsa_pagearray = | |
- new pagearray_type(bwtsa + block_beg, bwtsa + block_end); | |
- return bwtsa_pagearray; | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: Split the blocks in the left and right group. | |
- //---------------------------------------------------------------------------- | |
- long lrange_size = schedule.left_size(range_size); | |
- long rrange_size = range_size - lrange_size; | |
- | |
- long lrange_beg = range_beg; | |
- long lrange_end = range_beg + lrange_size; | |
- long rrange_beg = lrange_end; | |
- long rrange_end = rrange_beg + rrange_size; | |
- | |
- long lbeg = lrange_beg * max_block_size; | |
- long rbeg = rrange_beg * max_block_size; | |
- long lend = rbeg; | |
- long rend = rbeg + rrange_size * max_block_size; | |
- lbeg = std::max(0L, lbeg - shift); | |
- rbeg -= shift; | |
- lend -= shift; | |
- rend -= shift; | |
- | |
- long lsize = lend - lbeg; | |
- long rsize = rend - rbeg; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: Compute partial SAs and BWTs for left and right block. | |
- //---------------------------------------------------------------------------- | |
- | |
- // 2.a | |
- // | |
- // Left block | |
- long left_i0; | |
- pagearray_type *l_bwtsa = inmem_bwtsa_merge<saidx_t, pagesize_log>(text, | |
- text_length, bwtsa, gt, max_block_size, lrange_beg, lrange_end, | |
- max_threads, need_gt, true, left_i0, schedule, text_beg, text_end, | |
- supertext_length, supertext_filename, tail_gt_begin_reversed, i0_array, | |
- block_rank_matrix); | |
- | |
- // 2.b | |
- // | |
- // Right block | |
- long right_i0; | |
- pagearray_type *r_bwtsa = inmem_bwtsa_merge<saidx_t, pagesize_log>(text, | |
- text_length, bwtsa, gt, max_block_size, rrange_beg, rrange_end, | |
- max_threads, true, need_bwt, right_i0, schedule, text_beg, text_end, | |
- supertext_length, supertext_filename, tail_gt_begin_reversed, i0_array, | |
- block_rank_matrix); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 3: Merge partial SAs and BWTs. | |
- //---------------------------------------------------------------------------- | |
- fprintf(stderr, "Merging blocks %ld-%ld with %ld-%ld\n", | |
- lrange_beg + 1, lrange_end, rrange_beg + 1, rrange_end); | |
- long double start = utils::wclock(); | |
- | |
- // 3.a | |
- // | |
- // Compute gap | |
- fprintf(stderr, " Computing gap:\n"); | |
- inmem_gap_array *gap; | |
- long double rank_init_time; | |
- long double streaming_time; | |
- long double start1 = utils::wclock(); | |
- inmem_compute_gap<saidx_t, pagesize_log>(text, text_length, lbeg, lsize, | |
- rsize, *l_bwtsa, gt, gap, max_threads, need_gt, left_i0, (1L << 21), | |
- rank_init_time, streaming_time, block_rank_matrix, lrange_beg, | |
- lrange_size, rrange_size); | |
- fprintf(stderr, " Time: %.2Lf\n", utils::wclock() - start1); | |
- | |
- // 3.b | |
- // | |
- // Merge partial SAs and BWTs | |
- fprintf(stderr, " Merging SA/BWT: "); | |
- start1 = utils::wclock(); | |
- long delta_i0; | |
- if (need_bwt) | |
- (*r_bwtsa)[right_i0].bwt = text[rbeg - 1]; | |
- pagearray_type *result = parallel_merge(l_bwtsa, r_bwtsa, gap, | |
- max_threads, left_i0, delta_i0, lsize); | |
- result_i0 = left_i0 + delta_i0; | |
- long double merging_time = utils::wclock() - start1; | |
- fprintf(stderr, "total: %.2Lf\n", merging_time); | |
- | |
- // 3.c | |
- // | |
- // Clean up. | |
- start1 = utils::wclock(); | |
- delete l_bwtsa; | |
- delete r_bwtsa; | |
- delete gap; | |
- long double cleaning_time = utils::wclock() - start1; | |
- if (cleaning_time > 0.2L) | |
- fprintf(stderr, "Cleaning: %.2Lf\n", cleaning_time); | |
- | |
- long double time_per_elem_left = merging_time / (lsize + rsize) + rank_init_time / lsize; | |
- long double time_per_elem_right = merging_time / (lsize + rsize) + streaming_time / rsize; | |
- long double ratio = time_per_elem_right / time_per_elem_left; | |
- fprintf(stderr, "Time: %.2Lf (rl_ratio = %.3Lf)\n", | |
- utils::wclock() - start, ratio); | |
- | |
- return result; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWTSA_MERGE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_gap.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_gap.h | |
deleted file mode 100644 | |
index 95a1b4e5..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_gap.h | |
+++ /dev/null | |
@@ -1,297 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/inmem_compute_gap.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_GAP_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_GAP_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <map> | |
-#include <vector> | |
-#include <thread> | |
-#include <algorithm> | |
- | |
-#include "../bitvector.h" | |
-#include "../gap_buffer.h" | |
-#include "../multifile.h" | |
-#include "rank.h" | |
-#include "inmem_gap_array.h" | |
-#include "inmem_compute_initial_ranks.h" | |
-#include "inmem_stream.h" | |
-#include "inmem_update.h" | |
-#include "inmem_bwt_from_sa.h" | |
-#include "pagearray.h" | |
-#include "bwtsa.h" | |
-#include "sparse_isa.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename saidx_t, unsigned pagesize_log> | |
-void inmem_compute_gap(const unsigned char *text, long text_length, long left_block_beg, | |
- long left_block_size, long right_block_size, | |
- const pagearray<bwtsa_t<saidx_t>, pagesize_log> &bwtsa, | |
- bitvector *gt, inmem_gap_array* &gap, long max_threads, bool need_gt, long i0, | |
- long gap_buf_size, long double &rank_init_time, long double &streaming_time, | |
- long **block_rank_matrix, long lrange_beg, long lrange_size, long rrange_size) { | |
- long lrange_end = lrange_beg + lrange_size; | |
- long rrange_end = lrange_end + rrange_size; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: build rank data structure over BWT. | |
- //---------------------------------------------------------------------------- | |
- fprintf(stderr, " Building rank: "); | |
- long double start = utils::wclock(); | |
- typedef rank4n<saidx_t, pagesize_log> rank_type; | |
- rank_type *rank = new rank_type(&bwtsa, left_block_size, max_threads); | |
- rank_init_time = utils::wclock() - start; | |
- fprintf(stderr, "total: %.2Lf\n", rank_init_time); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: compute symbol counts and the last symbol of the left block. | |
- //---------------------------------------------------------------------------- | |
- long *count = new long[256]; | |
- const unsigned char *left_block = text + left_block_beg; | |
- std::copy(rank->m_count, rank->m_count + 256, count); | |
- unsigned char last = left_block[left_block_size - 1]; | |
- ++count[last]; | |
- --count[0]; | |
- for (long i = 0, s = 0, t; i < 256; ++i) | |
- { t = count[i]; count[i] = s; s += t; } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 3: compute starting positions for all streaming threads. | |
- //---------------------------------------------------------------------------- | |
- long left_block_end = left_block_beg + left_block_size; | |
- long right_block_beg = left_block_end; | |
- long right_block_end = left_block_end + right_block_size; | |
- | |
- long max_stream_block_size = (right_block_size + max_threads - 1) / max_threads; | |
- while (max_stream_block_size & 7) ++max_stream_block_size; | |
- long n_threads = (right_block_size + max_stream_block_size - 1) / max_stream_block_size; | |
- | |
- fprintf(stderr, " Computing initial ranks: "); | |
- start = utils::wclock(); | |
- std::vector<long> initial_ranks(n_threads); | |
- std::vector<std::pair<long, long> > initial_ranges(n_threads); | |
- std::thread **threads = new std::thread*[n_threads]; | |
- | |
- // 3.a | |
- // | |
- // Compute the last starting position using the matrix of initial ranks. | |
- typedef pagearray<bwtsa_t<saidx_t>, pagesize_log> pagearray_bwtsa_type; | |
- long last_stream_block_beg = right_block_beg + (n_threads - 1) * max_stream_block_size; | |
- long last_stream_block_end = right_block_end; | |
- | |
- initial_ranks[n_threads - 1] = 0L; | |
- for (long j = lrange_beg; j < lrange_end; ++j) | |
- initial_ranks[n_threads - 1] += block_rank_matrix[j][rrange_end - 1]; | |
- | |
- // 3.b | |
- // | |
- // Compute the starting position for all | |
- // starting positions other than the last one. | |
- long prev_stream_block_size = last_stream_block_end - last_stream_block_beg; | |
- for (long i = n_threads - 2; i >= 0; --i) { | |
- long stream_block_beg = right_block_beg + i * max_stream_block_size; | |
- long stream_block_end = std::min(stream_block_beg + max_stream_block_size, right_block_end); | |
- long stream_block_size = stream_block_end - stream_block_beg; | |
- const unsigned char *pat = text + stream_block_end; | |
- | |
- threads[i] = new std::thread(compute_range<pagearray_bwtsa_type>, | |
- text, left_block_beg, left_block_size, pat, prev_stream_block_size, | |
- std::ref(bwtsa), std::ref(initial_ranges[i])); | |
- | |
- prev_stream_block_size = stream_block_size; | |
- } | |
- | |
- for (long i = 0; i + 1 < n_threads; ++i) threads[i]->join(); | |
- for (long i = 0; i + 1 < n_threads; ++i) delete threads[i]; | |
- delete[] threads; | |
- fprintf(stderr, "%.2Lf ", utils::wclock() - start); | |
- | |
- bool nontrivial_range = false; | |
- for (long j = 0; j < n_threads - 1; ++j) | |
- if (initial_ranges[j].first != initial_ranges[j].second) | |
- nontrivial_range = true; | |
- | |
- if (nontrivial_range) { | |
- // 3.c | |
- // | |
- // Build the data structure allowing answering ISA queries. | |
- start = utils::wclock(); | |
- typedef pagearray<bwtsa_t<saidx_t>, pagesize_log> pagearray_type; | |
- typedef sparse_isa<pagearray_type, rank_type, 12U> sparse_isa_type; | |
- sparse_isa_type *sp_isa = new sparse_isa_type(&bwtsa, text + | |
- left_block_beg, rank, left_block_size, i0, max_threads); | |
- fprintf(stderr, "%.3Lf ", utils::wclock() - start); | |
- | |
- // 3.d | |
- // | |
- // Narrow nontrivial ranges to single elements. | |
- start = utils::wclock(); | |
- prev_stream_block_size = last_stream_block_end - last_stream_block_beg; | |
- long prev_rank = initial_ranks[n_threads - 1]; | |
- for (long i = n_threads - 2; i >= 0; --i) { | |
- long stream_block_beg = right_block_beg + i * max_stream_block_size; | |
- long stream_block_end = std::min(stream_block_beg + max_stream_block_size, right_block_end); | |
- long stream_block_size = stream_block_end - stream_block_beg; | |
- long suf_start = stream_block_end; | |
- | |
- long left = initial_ranges[i].first; | |
- long right = initial_ranges[i].second; | |
- | |
- // Keep refining the range [left..right) until it's empty. | |
- while (left != right) { | |
- // Valid values for mid are in [left..right). | |
- long mid = (left + right) / 2; | |
- | |
- // Check if suffix starting at position suf_start is larger | |
- // than the one starting at block_beg + bwtsa[mid].sa in the text. | |
- // We know they have a common prefix of length prev_stream_block_size. | |
- if ((long)bwtsa[mid].sa + prev_stream_block_size >= left_block_size) { | |
- if (gt->get(text_length - 1 - (suf_start + left_block_size - (long)bwtsa[mid].sa - 1))) left = mid + 1; | |
- else right = mid; | |
- } else { | |
- long j = bwtsa[mid].sa + prev_stream_block_size; | |
- if (sp_isa->query(j) < prev_rank) left = mid + 1; | |
- else right = mid; | |
- } | |
- } | |
- | |
- initial_ranks[i] = left; | |
- prev_rank = left; | |
- prev_stream_block_size = stream_block_size; | |
- } | |
- | |
- delete sp_isa; | |
- fprintf(stderr, "%.3Lf ", utils::wclock() - start); | |
- } else { | |
- for (long j = 0; j + 1 < n_threads; ++j) | |
- initial_ranks[j] = initial_ranges[j].first; | |
- } | |
- fprintf(stderr, "\n"); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 4: allocate gap array. The gap array is indexed from 0 to | |
- // left_block_size so the number of elements is left_block_size + 1. | |
- //---------------------------------------------------------------------------- | |
- start = utils::wclock(); | |
- gap = new inmem_gap_array(left_block_size + 1); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 5: allocate buffers, buffer polls and auxiliary arrays. | |
- //---------------------------------------------------------------------------- | |
- | |
- // Allocate gap buffers. | |
- long n_gap_buffers = 2 * n_threads; | |
- gap_buffer<saidx_t> **gap_buffers = new gap_buffer<saidx_t>*[n_gap_buffers]; | |
- for (long i = 0; i < n_gap_buffers; ++i) | |
- gap_buffers[i] = new gap_buffer<saidx_t>(gap_buf_size, max_threads); | |
- | |
- // Create poll of empty and full buffers. | |
- gap_buffer_poll<saidx_t> *empty_gap_buffers = new gap_buffer_poll<saidx_t>(); | |
- gap_buffer_poll<saidx_t> *full_gap_buffers = new gap_buffer_poll<saidx_t>(n_threads); | |
- | |
- // Add empty buffers to empty poll. | |
- for (long i = 0; i < n_gap_buffers; ++i) | |
- empty_gap_buffers->add(gap_buffers[i]); | |
- | |
- // Allocate temp arrays and oracles. | |
- long max_buffer_elems = gap_buf_size / sizeof(saidx_t); | |
- saidx_t *temp = (saidx_t *)malloc(max_buffer_elems * n_threads * sizeof(saidx_t)); | |
- int *oracle = (int *)malloc(max_buffer_elems * n_threads * sizeof(int)); | |
- long double allocations_time = utils::wclock() - start; | |
- if (allocations_time > 0.05L) | |
- fprintf(stderr, " Allocations: %.2Lf\n", allocations_time); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 6: run the parallel streaming. | |
- //---------------------------------------------------------------------------- | |
- | |
- // Start streaming threads. | |
- fprintf(stderr, " Streaming: "); | |
- start = utils::wclock(); | |
- threads = new std::thread*[n_threads]; | |
- for (long t = 0; t < n_threads; ++t) { | |
- long beg = right_block_beg + t * max_stream_block_size; | |
- long end = std::min(beg + max_stream_block_size, right_block_end); | |
- | |
- threads[t] = new std::thread(inmem_parallel_stream<rank_type, saidx_t>, | |
- text, text_length, beg, end, last, count, full_gap_buffers, | |
- empty_gap_buffers, initial_ranks[t], i0, rank, gap->m_length, max_threads, | |
- gt, temp + t * max_buffer_elems, oracle + t * max_buffer_elems, need_gt); | |
- } | |
- | |
- // Start updating thread. | |
- std::thread *updater = new std::thread(inmem_gap_updater<saidx_t>, | |
- full_gap_buffers, empty_gap_buffers, gap, max_threads); | |
- | |
- // Wait to all threads to finish. | |
- for (long t = 0; t < n_threads; ++t) threads[t]->join(); | |
- updater->join(); | |
- streaming_time = utils::wclock() - start; | |
- long double streaming_speed = | |
- (right_block_size / (1024.L * 1024)) / streaming_time; | |
- fprintf(stderr, "%.2Lf (%.2LfMiB/s)\n", streaming_time, | |
- streaming_speed); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 7: clean up and sort gap->m_excess. | |
- //---------------------------------------------------------------------------- | |
- start = utils::wclock(); | |
- free(oracle); | |
- free(temp); | |
- for (long i = 0; i < n_threads; ++i) delete threads[i]; | |
- for (long i = 0; i < n_gap_buffers; ++i) delete gap_buffers[i]; | |
- delete updater; | |
- delete[] threads; | |
- delete[] gap_buffers; | |
- delete empty_gap_buffers; | |
- delete full_gap_buffers; | |
- delete rank; | |
- delete[] count; | |
- | |
- std::sort(gap->m_excess.begin(), gap->m_excess.end()); | |
- | |
- long double cleaning_time = utils::wclock() - start; | |
- if (cleaning_time > 0.1L) | |
- fprintf(stderr, " Cleaning: %.2Lf\n", cleaning_time); | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_GAP_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_initial_ranks.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_initial_ranks.h | |
deleted file mode 100644 | |
index 98a4ac1b..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_initial_ranks.h | |
+++ /dev/null | |
@@ -1,922 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/inmem_compute_initial_ranks.h | |
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_INITIAL_RANKS_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_INITIAL_RANKS_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <string> | |
- | |
-#include "../background_block_reader.h" | |
-#include "../multifile.h" | |
-#include "../multifile_bit_stream_reader.h" | |
-#include "bwtsa.h" | |
-#include "pagearray.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-// #define BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- | |
-inline int lcp_compare(const unsigned char *text, long text_length, | |
- const unsigned char *pat, long pat_length, long gt_begin_length, | |
- long j, multifile_bit_stream_reader &rev_gt_begin_reader, long &lcp) { | |
- while (lcp < pat_length && j + lcp < text_length && pat[lcp] == text[j + lcp]) | |
- ++lcp; | |
- | |
- if (j + lcp >= text_length) { | |
- if (rev_gt_begin_reader.access(gt_begin_length - (text_length - j))) return 1; | |
- else return -1; | |
- } else if (lcp == pat_length) return 0; | |
- else { | |
- if (pat[lcp] < text[j + lcp]) return -1; | |
- else return 1; | |
- } | |
-} | |
- | |
-inline int lcp_compare(const unsigned char *text, const unsigned char *pat, | |
- long pat_length, long j, long &lcp) { | |
- while (lcp < pat_length && pat[lcp] == text[j + lcp]) ++lcp; | |
- if (lcp == pat_length) return 0; | |
- else if (pat[lcp] < text[j + lcp]) return -1; | |
- else return 1; | |
-} | |
- | |
-//------------------------------------------------------------------------------ | |
-// Find the range [left..right) of suffixes starting inside the block that are | |
-// prefixed with pat[0..pat_length). In case there is no such suffix, left == | |
-// right and they both point to the first suffix larger than the pattern. | |
-//------------------------------------------------------------------------------ | |
-template<typename pagearray_type> | |
-void compute_range(const unsigned char *text, long block_beg, long block_size, | |
- const unsigned char *pat, long pat_length, const pagearray_type &bwtsa, | |
- std::pair<long, long> &ret) { | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- long min_discrepancy = utils::random_long(0L, 10L); | |
- long balancing_factor = utils::random_long(1L, 10L); | |
-#else | |
- static const long min_discrepancy = (1L << 16); | |
- static const long balancing_factor = 64L; | |
-#endif | |
- | |
- // Find left. | |
- long low = -1L, high = block_size; | |
- long llcp = 0, rlcp = 0; | |
- while (low + 1 != high) { | |
- // Invariant: left is in the range (low..high]. | |
- long lcp = std::min(llcp, rlcp); | |
- | |
- // Compute mid. | |
- // Valid values for mid are: low + 1, .., high - 1. | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- // Choose the pivot that split the range into two | |
- // parts of sizes with ratio equal to logd / d. | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else // Discrepancy is too small, use standard binary search. | |
- mid = (low + high) / 2; | |
- | |
- if (lcp_compare(text, pat, pat_length, block_beg + (long)bwtsa[mid].sa, lcp) <= 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- long left = high; | |
- | |
- // Find right. | |
- if (rlcp == pat_length) { | |
- high = block_size; | |
- rlcp = 0; | |
- | |
- while (low + 1 != high) { | |
- // Invariant: right is in the range (low..high]. | |
- long lcp = std::min(llcp, rlcp); | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else mid = (low + high) / 2; | |
- | |
- if (lcp_compare(text, pat, pat_length, block_beg + (long)bwtsa[mid].sa, lcp) < 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- } | |
- long right = high; | |
- | |
- ret = std::make_pair(left, right); | |
-} | |
- | |
-//------------------------------------------------------------------------------ | |
-// On the entry to the function: | |
-// - all suffixes in the range [0..left) are smaller than pat[0..old_pat_length), | |
-// - all suffixes in the range [right..text_length) are larger than the pattern, | |
-// - suffixes in the range [left..right) are unknown -- they can either be | |
-// larger or smaller than the pattern, or equal -- in any case, they have a | |
-// common prefix of length `old_pat_length' with the pattern. | |
-//------------------------------------------------------------------------------ | |
-template<typename saidx_t> | |
-void refine_range(const unsigned char *text, long block_beg, | |
- const bwtsa_t<saidx_t> *block_psa, long left, long right, | |
- long old_pat_length, long pat_length, const unsigned char *pat, | |
- long &newleft, long &newright) { | |
- long low = left - 1; | |
- long high = right; | |
- long llcp = old_pat_length; | |
- long rlcp = old_pat_length; | |
- | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- long min_discrepancy = utils::random_long(0L, 10L); | |
- long balancing_factor = utils::random_long(1L, 10L); | |
-#else | |
- static const long min_discrepancy = (1L << 16); | |
- static const long balancing_factor = 64L; | |
-#endif | |
- | |
- while (low + 1 != high) { | |
- // Invariant: newleft is in the range (low, high]. | |
- long lcp = std::min(llcp, rlcp); | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else mid = (low + high) / 2; | |
- | |
- if (lcp_compare(text, pat, pat_length, block_beg + block_psa[mid].sa, lcp) <= 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- newleft = high; | |
- | |
- if (rlcp >= pat_length) { | |
- high = right; | |
- rlcp = old_pat_length; | |
- | |
- while (low + 1 != high) { | |
- // Invariant: newright is in the range (low, high]. | |
- long lcp = std::min(llcp, rlcp); | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else mid = (low + high) / 2; | |
- | |
- if (lcp_compare(text, pat, pat_length, block_beg + block_psa[mid].sa, lcp) < 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- } | |
- newright = high; | |
-} | |
- | |
-template<typename saidx_t> | |
-void refine_range(const unsigned char *text, long text_length, | |
- long tail_gt_begin_reversed_length, long block_beg, | |
- const bwtsa_t<saidx_t> *block_psa, long left, long right, | |
- const multifile *tail_gt_begin_reversed, | |
- long old_pat_length, long pat_length, | |
- const unsigned char *pat, long &newleft, long &newright) { | |
- multifile_bit_stream_reader reader(tail_gt_begin_reversed); | |
- | |
- long low = left - 1; | |
- long high = right; | |
- long llcp = old_pat_length; | |
- long rlcp = old_pat_length; | |
- | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- long min_discrepancy = utils::random_long(0L, 10L); | |
- long balancing_factor = utils::random_long(1L, 10L); | |
-#else | |
- static const long min_discrepancy = (1L << 16); | |
- static const long balancing_factor = 64L; | |
-#endif | |
- | |
- while (low + 1 != high) { | |
- // Invariant: newleft is in the range (low, high]. | |
- long lcp = std::min(llcp, rlcp); | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else mid = (low + high) / 2; | |
- | |
- if (lcp_compare(text, text_length, pat, pat_length, tail_gt_begin_reversed_length, | |
- block_beg + block_psa[mid].sa, reader, lcp) <= 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- newleft = high; | |
- | |
- if (rlcp >= pat_length) { | |
- high = right; | |
- rlcp = old_pat_length; | |
- | |
- while (low + 1 != high) { | |
- // Invariant: newright is in the range (low, high]. | |
- long lcp = std::min(llcp, rlcp); | |
- long mid = 0L; | |
- if (llcp + min_discrepancy < rlcp) { | |
- long d = rlcp - llcp; | |
- long logd = utils::log2ceil(d); | |
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else if (rlcp + min_discrepancy < llcp) { | |
- long d = llcp - rlcp; | |
- long logd = utils::log2ceil(d); | |
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd); | |
- } else mid = (low + high) / 2; | |
- | |
- if (lcp_compare(text, text_length, pat, pat_length, tail_gt_begin_reversed_length, | |
- block_beg + block_psa[mid].sa, reader, lcp) < 0) { | |
- high = mid; | |
- rlcp = lcp; | |
- } else { | |
- low = mid; | |
- llcp = lcp; | |
- } | |
- } | |
- } | |
- newright = high; | |
-} | |
- | |
-//============================================================================== | |
-// Variant 1: compute ranges for columns other than the last two. | |
-//============================================================================== | |
-template<typename saidx_t> | |
-void compute_ranges_1(const unsigned char *text, long text_length, | |
- const bwtsa_t<saidx_t> *bwtsa, long max_block_size, | |
- std::pair<long, long> **primary_range, | |
- std::pair<long, long> **secondary_range, | |
- long row, long column) { | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- long block_end = text_length - (n_blocks - 1 - row) * max_block_size; | |
- long block_begin = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_begin; | |
- long pat_start = text_length - (n_blocks - 1 - column) * max_block_size; | |
- | |
- const unsigned char *pat = text + pat_start; | |
- const bwtsa_t<saidx_t> *block_psa = bwtsa + block_begin; | |
- | |
- // Check that 0 <= row < column < n_blocks - 2 and | |
- // pat_start + 2 * max_block_size <= text_length. | |
- if (0 > row || row >= column || column >= n_blocks - 2 || | |
- pat_start + 2L * max_block_size > text_length) { | |
- fprintf(stdout, "\nError: invariant in compute_ranges_1 failed.\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- long left = 0L; | |
- long right = block_size; | |
- long cur_pat_length = 0L; | |
- | |
- // Compute the primary range. | |
- { | |
- long new_pat_length = max_block_size; | |
- if (left != right && cur_pat_length < new_pat_length) { | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, block_begin, block_psa, left, right, | |
- cur_pat_length, new_pat_length, pat, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- } | |
- cur_pat_length = new_pat_length; | |
- } | |
- primary_range[row][column] = std::make_pair(left, right); | |
- | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- // Verify the primary range. | |
- { | |
- long smaller = 0L; | |
- long equal = 0L; | |
- for (long j = block_begin; j < block_end; ++j) { | |
- long lcp = 0L; | |
- while (lcp < max_block_size && text[j + lcp] == pat[lcp]) ++lcp; | |
- if (lcp == max_block_size) ++equal; | |
- else if (text[j + lcp] < pat[lcp]) ++smaller; | |
- } | |
- long check_left = smaller; | |
- long check_right = smaller + equal; | |
- if (primary_range[row][column] != std::make_pair(check_left, check_right)) { | |
- fprintf(stdout, "\nError: incorrect primary range!\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- } | |
-#endif | |
- | |
- // Compute secondary range. | |
- { | |
- long new_pat_length = cur_pat_length + max_block_size; | |
- if (left != right && cur_pat_length < new_pat_length) { | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, block_begin, block_psa, left, right, | |
- cur_pat_length, new_pat_length, pat, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- } | |
- cur_pat_length = new_pat_length; | |
- } | |
- secondary_range[row][column] = std::make_pair(left, right); | |
- | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- // Verify the secondary range. | |
- { | |
- long smaller = 0L; | |
- long equal = 0L; | |
- for (long j = block_begin; j < block_end; ++j) { | |
- long lcp = 0L; | |
- while (lcp < cur_pat_length && text[j + lcp] == pat[lcp]) ++lcp; | |
- if (lcp == cur_pat_length) ++equal; | |
- else if (text[j + lcp] < pat[lcp]) ++smaller; | |
- } | |
- long check_left = smaller; | |
- long check_right = smaller + equal; | |
- if (secondary_range[row][column] != std::make_pair(check_left, check_right)) { | |
- fprintf(stdout, "\nError: incorrect secondary range!\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- } | |
-#endif | |
-} | |
- | |
-//============================================================================== | |
-// Variant 2: compute primary and secondary range for second to last column. | |
-//============================================================================== | |
-template<typename saidx_t> | |
-void compute_ranges_2(const unsigned char *text, long text_length, | |
- long text_beg, long supertext_length, const bwtsa_t<saidx_t> *bwtsa, | |
- long max_block_size, background_block_reader *reader, | |
- const unsigned char *next_block, | |
- std::pair<long, long> **primary_range, | |
- std::pair<long, long> **secondary_range, | |
- long row, long column) { | |
- long text_end = text_beg + text_length; | |
- long tail_length = supertext_length - text_end; | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- long block_end = text_length - (n_blocks - 1 - row) * max_block_size; | |
- long block_begin = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_begin; | |
- long pat_start = text_length - (n_blocks - 1 - column) * max_block_size; | |
- | |
- const unsigned char *pat = text + pat_start; | |
- const bwtsa_t<saidx_t> *block_psa = bwtsa + block_begin; | |
- | |
- // Check that 0 <= row < column and column == n_blocks - 2 | |
- // and pat_start + max_block_size == text_length. | |
- if (0 > row || row >= column || column != n_blocks - 2 || | |
- pat_start + max_block_size != text_length) { | |
- fprintf(stdout, "\nError: invariant in compute_ranges_2 failed.\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- long left = 0L; | |
- long right = block_size; | |
- long cur_pat_length = 0L; | |
- | |
- // Compute primary range. | |
- { | |
- long new_pat_length = max_block_size; | |
- if (left != right && cur_pat_length < new_pat_length) { | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, block_begin, block_psa, left, right, | |
- cur_pat_length, new_pat_length, pat, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- } | |
- cur_pat_length = new_pat_length; | |
- } | |
- primary_range[row][column] = std::make_pair(left, right); | |
- | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- // Verify the primary range. | |
- { | |
- long smaller = 0L; | |
- long equal = 0L; | |
- for (long j = block_begin; j < block_end; ++j) { | |
- long lcp = 0L; | |
- while (lcp < cur_pat_length && text[j + lcp] == pat[lcp]) ++lcp; | |
- if (lcp == cur_pat_length) ++equal; | |
- else if (text[j + lcp] < pat[lcp]) ++smaller; | |
- } | |
- long check_left = smaller; | |
- long check_right = smaller + equal; | |
- if (primary_range[row][column] != std::make_pair(check_left, check_right)) { | |
- fprintf(stdout, "\nError: incorrect primary range!\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- } | |
-#endif | |
- | |
- static const long chunk_size = (1L << 20); | |
- | |
- // Compute secondary range. | |
- long pat_length = cur_pat_length + std::min(tail_length, max_block_size); | |
- if (reader) { | |
- // The reader != NULL, meaning that we have to gradually refine the range. | |
- while (left != right && cur_pat_length < pat_length) { | |
- long next_chunk = std::min(chunk_size, pat_length - cur_pat_length); | |
- long new_pat_length = cur_pat_length + next_chunk; | |
- reader->wait(new_pat_length - max_block_size); | |
- | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, block_begin, block_psa, left, right, cur_pat_length, | |
- new_pat_length, reader->m_data - max_block_size, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- cur_pat_length = new_pat_length; | |
- } | |
- } else { | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- // This version extends the range chunk by chunk (using random chunk | |
- // lengths) even if the whole next block is available. This is for | |
- // debugging purpose. | |
- while (left != right && cur_pat_length < pat_length) { | |
- long next_chunk = utils::random_long(1L, pat_length - cur_pat_length); | |
- long new_pat_length = cur_pat_length + next_chunk; | |
- | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, block_begin, block_psa, left, right, cur_pat_length, | |
- new_pat_length, next_block - max_block_size, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- cur_pat_length = new_pat_length; | |
- } | |
-#else | |
- // The whole next block is available, we can just do one binary search. | |
- long new_pat_length = pat_length; | |
- if (left != right && cur_pat_length < new_pat_length) { | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, block_begin, block_psa, left, right, cur_pat_length, | |
- new_pat_length, next_block - max_block_size, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- } | |
- cur_pat_length = new_pat_length; | |
-#endif | |
- } | |
- secondary_range[row][column] = std::make_pair(left, right); | |
-} | |
- | |
-//============================================================================== | |
-// Variant 3: compute primary and secondary range for the last column. | |
-//============================================================================== | |
-template<typename saidx_t> | |
-void compute_ranges_3(const unsigned char *text, long text_length, | |
- long text_beg, long supertext_length, const bwtsa_t<saidx_t> *bwtsa, | |
- long max_block_size, const multifile *tail_gt_begin_reversed, | |
- background_block_reader *reader, const unsigned char *next_block, | |
- std::pair<long, long> **primary_range, | |
- std::pair<long, long> **secondary_range, | |
- long row, long column) { | |
- long text_end = text_beg + text_length; | |
- long tail_length = supertext_length - text_end; | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- long block_end = text_length - (n_blocks - 1 - row) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_beg; | |
- const bwtsa_t<saidx_t> *block_psa = bwtsa + block_beg; | |
- long first_range_pat_length = std::min(max_block_size, tail_length); | |
- | |
- // length of text stored in next_block (if not NULL) | |
- long pat_length = std::min(text_length, tail_length); | |
- | |
- // Note: max_block_size <= text_length thus | |
- // first_range_pat_length <= pat_length | |
- | |
- // Invariant: one of the following cases hold: | |
- // (1) next_block != NULL and reader == NULL and next_block stores | |
- // std::min(text_length, tail_length) symbols after text | |
- // (2) next_block == NULL and reader != NULL and reader will read | |
- // std::min(text_length, tail_length) symbols after text | |
- | |
- // Check that 0 <= row < colum and column == n_blocks - 1. | |
- if (0 > row || row >= column || column != n_blocks - 1) { | |
- fprintf(stdout, "\nError: invariant 1 in compute_ranges_3 failed.\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- long left = 0L; | |
- long right = block_size; | |
- long cur_pat_length = 0L; | |
- | |
- static const long chunk_size = (1L << 20); | |
- | |
- // Compute the primary range. | |
- if (reader) { | |
- // The reader != NULL, meaning that we have to gradually refine the range. | |
- while (left != right && cur_pat_length < first_range_pat_length) { | |
- long next_chunk = std::min(chunk_size, | |
- first_range_pat_length - cur_pat_length); | |
- long new_pat_length = cur_pat_length + next_chunk; | |
- reader->wait(new_pat_length); | |
- | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, text_length, tail_length, block_beg, block_psa, | |
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length, | |
- reader->m_data, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- cur_pat_length = new_pat_length; | |
- } | |
- } else { | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- // This version extends the range chunk by chunk (using random chunk | |
- // lengths) even if the whole next block is available. This is for | |
- // debugging purpose. | |
- while (left != right && cur_pat_length < first_range_pat_length) { | |
- long next_chunk = utils::random_long(1L, | |
- first_range_pat_length - cur_pat_length); | |
- long new_pat_length = cur_pat_length + next_chunk; | |
- | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, text_length, tail_length, block_beg, block_psa, | |
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length, | |
- next_block, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- cur_pat_length = new_pat_length; | |
- } | |
-#else | |
- // The whole next block is available, we can just do one binary search. | |
- long new_pat_length = first_range_pat_length; | |
- if (left != right && cur_pat_length < new_pat_length) { | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, text_length, tail_length, block_beg, block_psa, | |
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length, | |
- next_block, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- } | |
- cur_pat_length = new_pat_length; | |
-#endif | |
- } | |
- primary_range[row][column] = std::make_pair(left, right); | |
- | |
- // Compute the secondary range. | |
- if (reader) { | |
- // The reader != NULL, meaning that we have to gradually refine the range. | |
- while (left != right && cur_pat_length < pat_length) { | |
- long next_chunk = std::min(chunk_size, pat_length - cur_pat_length); | |
- long new_pat_length = cur_pat_length + next_chunk; | |
- reader->wait(new_pat_length); | |
- | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, text_length, tail_length, block_beg, block_psa, | |
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length, | |
- reader->m_data, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- cur_pat_length = new_pat_length; | |
- } | |
- } else { | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- // This version extends the range chunk by chunk (using random chunk | |
- // lengths) even if the whole next block is available. This is for | |
- // debugging purpose. | |
- while (left != right && cur_pat_length < pat_length) { | |
- long next_chunk = utils::random_long(1L, pat_length - cur_pat_length); | |
- long new_pat_length = cur_pat_length + next_chunk; | |
- | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, text_length, tail_length, block_beg, block_psa, | |
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length, | |
- next_block, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- cur_pat_length = new_pat_length; | |
- } | |
-#else | |
- // The whole next block is available, we can just do one binary search. | |
- long new_pat_length = pat_length; | |
- if (left != right && cur_pat_length < new_pat_length) { | |
- long newleft = 0L; | |
- long newright = 0L; | |
- refine_range(text, text_length, tail_length, block_beg, block_psa, | |
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length, | |
- next_block, newleft, newright); | |
- left = newleft; | |
- right = newright; | |
- } | |
- cur_pat_length = new_pat_length; | |
-#endif | |
- } | |
- secondary_range[row][column] = std::make_pair(left, right); | |
- | |
- if (left != right && text_length <= tail_length) { | |
- fprintf(stdout, "\nError: left != right && text_length <= tail_length.\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
-} | |
- | |
-template<typename saidx_t> | |
-void task_solver_code(const unsigned char *text, | |
- long text_length, const bwtsa_t<saidx_t> *bwtsa, | |
- long max_block_size, | |
- std::pair<long, long> **primary_range, | |
- std::pair<long, long> **secondary_range, | |
- std::vector<std::pair<long, long> > &tasks, | |
- std::mutex &tasks_mutex) { | |
- while (true) { | |
- // Get a task from the task collection. | |
- std::pair<long, long> task; | |
- bool task_avail = true; | |
- std::unique_lock<std::mutex> lk(tasks_mutex); | |
- if (tasks.empty()) task_avail = false; | |
- else { | |
- task = tasks.back(); | |
- tasks.pop_back(); | |
- } | |
- lk.unlock(); | |
- | |
- if (!task_avail) break; | |
- | |
- // Solve the task and save the answer. | |
- compute_ranges_1(text, text_length, bwtsa, max_block_size, | |
- primary_range, secondary_range, task.first, task.second); | |
- } | |
-} | |
- | |
-template<typename saidx_t> | |
-void compute_block_rank_matrix(const unsigned char *text, long text_length, | |
- const bwtsa_t<saidx_t> *bwtsa, long max_block_size, long text_beg, | |
- long supertext_length, std::string, | |
- const multifile *tail_gt_begin_reversed, background_block_reader *reader, | |
- const unsigned char *next_block, long **block_rank_matrix) { | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- long text_end = text_beg + text_length; | |
- long tail_length = supertext_length - text_end; | |
- | |
- // Allocate primary and secondary ranges. | |
- std::pair<long, long> **primary_range = new std::pair<long, long>*[n_blocks]; | |
- std::pair<long, long> **secondary_range = new std::pair<long, long>*[n_blocks]; | |
- for (long row = 0; row < n_blocks; ++row) { | |
- primary_range[row] = new std::pair<long, long>[n_blocks]; | |
- secondary_range[row] = new std::pair<long, long>[n_blocks]; | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: Start the threads computing ranges for the last column | |
- //---------------------------------------------------------------------------- | |
- std::thread **threads_last_col = NULL; | |
- if (n_blocks > 1) { | |
- threads_last_col = new std::thread*[n_blocks - 1]; | |
- for (long row = 0; row + 1 < n_blocks; ++row) { | |
- long column = n_blocks - 1; | |
- threads_last_col[row] = new std::thread(compute_ranges_3<saidx_t>, text, | |
- text_length, text_beg, supertext_length, bwtsa, max_block_size, | |
- tail_gt_begin_reversed, reader, next_block, primary_range, | |
- secondary_range, row, column); | |
- } | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: Start the threads computing ranges for the second-to-last column. | |
- //---------------------------------------------------------------------------- | |
- std::thread **threads_second_last_col = NULL; | |
- if (n_blocks > 2) { | |
- threads_second_last_col = new std::thread*[n_blocks - 2]; | |
- for (long row = 0; row + 2 < n_blocks; ++row) { | |
- long column = n_blocks - 2; | |
- threads_second_last_col[row] = new std::thread(compute_ranges_2<saidx_t>, | |
- text, text_length, text_beg, supertext_length, bwtsa, max_block_size, | |
- reader, next_block, primary_range, secondary_range, row, column); | |
- } | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 3: Start threads computing columns other than the last two. | |
- //---------------------------------------------------------------------------- | |
- std::vector<std::pair<long, long> > tasks; | |
- std::mutex tasks_mutex; | |
- for (long row = 0; row < n_blocks; ++row) | |
- for (long col = row + 1; col + 2 < n_blocks; ++col) | |
- tasks.push_back(std::make_pair(row, col)); | |
- std::random_shuffle(tasks.begin(), tasks.end()); // solve in any order | |
- std::thread **threads_other = new std::thread*[n_blocks]; | |
- for (long t = 0; t < n_blocks; ++t) | |
- threads_other[t] = new std::thread(task_solver_code<saidx_t>, text, | |
- text_length, bwtsa, max_block_size, primary_range, secondary_range, | |
- std::ref(tasks), std::ref(tasks_mutex)); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 4: Wait for all threads to finish. | |
- //---------------------------------------------------------------------------- | |
- | |
- // 4.1 | |
- // | |
- // Wait for the threads computing columns other than last two. | |
- for (long t = 0; t < n_blocks; ++t) threads_other[t]->join(); | |
- for (long t = 0; t < n_blocks; ++t) delete threads_other[t]; | |
- delete[] threads_other; | |
- | |
- // 4.2 | |
- // | |
- // Wait for the threads computing second-to-last column to finish. | |
- if (n_blocks > 2) { | |
- for (long row = 0; row + 2 < n_blocks; ++row) | |
- threads_second_last_col[row]->join(); | |
- for (long row = 0; row + 2 < n_blocks; ++row) | |
- delete threads_second_last_col[row]; | |
- delete[] threads_second_last_col; | |
- } | |
- | |
- // 4.3 | |
- // | |
- // Wait for the threads computing the last column to finish. | |
- if (n_blocks > 1) { | |
- for (long row = 0; row + 1 < n_blocks; ++row) threads_last_col[row]->join(); | |
- for (long row = 0; row + 1 < n_blocks; ++row) delete threads_last_col[row]; | |
- delete[] threads_last_col; | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 5: Compute the rank values from primary and secondary ranges. | |
- //---------------------------------------------------------------------------- | |
- for (long row = n_blocks - 1; row >= 0; --row) { | |
- for (long col = n_blocks - 1; col > row; --col) { | |
- long left = secondary_range[row][col].first; | |
- long right = secondary_range[row][col].second; | |
- | |
- if (col != n_blocks - 1 && | |
- (col != n_blocks - 2 || tail_length >= max_block_size)) { | |
- long cur_block_end = text_length - (n_blocks - 1 - row) * max_block_size; | |
- long cur_block_beg = std::max(0L, cur_block_end - max_block_size); | |
- long cur_block_size = cur_block_end - cur_block_beg; | |
- long shift = max_block_size - cur_block_size; | |
- long next_block_end = text_length - (n_blocks - 1 - (row + 1)) * max_block_size; | |
- long next_block_beg = std::max(0L, next_block_end - max_block_size); | |
- | |
- const bwtsa_t<saidx_t> *cur_block_psa = bwtsa + cur_block_beg; | |
- const bwtsa_t<saidx_t> *next_block_psa = bwtsa + next_block_beg; | |
- | |
- // Compute the ranges. | |
- long next_primary_range_beg = primary_range[row + 1][col + 1].first; | |
- long next_primary_range_end = primary_range[row + 1][col + 1].second; | |
- long next_primary_range_size = next_primary_range_end - | |
- next_primary_range_beg; | |
- | |
- // Compute the difference of the arithmetic progression. | |
- long delta = 0L; | |
- long next_psa_first = 0L; | |
- long next_psa_second = 0L; | |
- if (next_primary_range_size > 1) { | |
- next_psa_first = next_block_psa[next_primary_range_beg].sa; | |
- next_psa_second = next_block_psa[next_primary_range_beg + 1].sa; | |
- delta = next_psa_second - next_psa_first; | |
- } | |
- | |
- // Invariant: | |
- // 1. the primary range of next block contains (possibly | |
- // zero) values forming an arithmetic progression, | |
- // 2. elements in the range [left..right) of the psa of the | |
- // current block incremented by `shift' appear in the primary | |
- // range of the next block. | |
- | |
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE | |
- // Check that both invariants hold. | |
- for (long j = next_primary_range_end; j + 1 < next_primary_range_end; ++j) | |
- if ((long)next_block_psa[j + 1].sa - (long)next_block_psa[j].sa != delta) { | |
- fprintf(stdout, "Invariant 1 failed.\n"); std::exit(EXIT_FAILURE); } | |
- for (long j = left; j < right; ++j) { | |
- long suf = cur_block_psa[j].sa + shift; | |
- bool found = false; | |
- for (long jj = next_primary_range_beg; jj < next_primary_range_end; ++jj) | |
- if ((long)next_block_psa[jj].sa == suf) { found = true; break; } | |
- if (!found) { | |
- fprintf(stdout, "Invariant 2 failed.\n"); | |
- std::fflush(stdout); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- } | |
-#endif | |
- | |
- // Keep refining the range [left..right) until it's empty. | |
- while (left != right) { | |
- // Valid values for mid are in [left..right). | |
- long mid = (left + right) / 2; | |
- long suf = (long)cur_block_psa[mid].sa + shift; | |
- | |
- // Locate suf in next_block_psa using invariants 1. and 2. | |
- long pos = next_primary_range_beg; | |
- if (next_primary_range_size > 1) | |
- pos += (suf - next_psa_first) / delta; | |
- | |
- // Refine the range. | |
- if (pos < block_rank_matrix[row + 1][col + 1]) left = mid + 1; | |
- else right = mid; | |
- } | |
- } | |
- | |
- block_rank_matrix[row][col] = left; | |
- } | |
- } | |
- | |
- // Clean up. | |
- for (long row = 0; row < n_blocks; ++row) { | |
- delete[] primary_range[row]; | |
- delete[] secondary_range[row]; | |
- } | |
- delete[] primary_range; | |
- delete[] secondary_range; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_INITIAL_RANKS_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_gap_array.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_gap_array.h | |
deleted file mode 100644 | |
index 7b0b0381..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_gap_array.h | |
+++ /dev/null | |
@@ -1,213 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/inmem_gap_array.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_SASCAN_INMEM_GAP_ARRAY_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_SASCAN_INMEM_GAP_ARRAY_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <vector> | |
-#include <algorithm> | |
-#include <mutex> | |
-#include <stack> | |
-#include <thread> | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-struct inmem_gap_array { | |
- unsigned char *m_count; | |
- long m_length; | |
- | |
- std::vector<long> m_excess; | |
- std::mutex m_excess_mutex; | |
- | |
- inmem_gap_array(long length) | |
- : m_length(length) { | |
- m_count = (unsigned char *)calloc(m_length, sizeof(unsigned char)); | |
- } | |
- | |
- ~inmem_gap_array() { | |
- free(m_count); | |
- } | |
- | |
- //============================================================================== | |
- // Find and smallest j such that j + gap[0] + .. + gap[j] >= a. Store | |
- // the value of j into b and gap[0] + .. + gap[j] into c. To speed up the | |
- // algorithm, we have array gapsum defined as | |
- // | |
- // gapsum[i] = gap[0] + .. + gap[i * block_size - 1]. | |
- // | |
- //============================================================================== | |
- static void answer_single_gap_query(const inmem_gap_array *gap, long block_size, | |
- const long *gapsum, long a, long &b, long &c) { | |
- long n_blocks = (gap->m_length + block_size - 1) / block_size; | |
- | |
- // Find the block containing the correct index. To do that find the largest | |
- // j such that gapsum[j] + block_size * j - 1 < a and start searching from | |
- // j * block_size. | |
- long j = 0; | |
- while (j + 1 < n_blocks && gapsum[j + 1] + block_size * (j + 1) - 1 < a) ++j; | |
- // Invariant: the j we are searching for is > j * block_size - 1. | |
- | |
- long sum = gapsum[j]; | |
- j = block_size * j; | |
- size_t excess_ptr = std::lower_bound(gap->m_excess.begin(), | |
- gap->m_excess.end(), j) - gap->m_excess.begin(); | |
- while (true) { | |
- // Invariant: sum = gap[0] + .. + gap[j - 1]. | |
- // Compute gap[j] using small gap array representation. | |
- long gap_j = gap->m_count[j]; | |
- while (excess_ptr < gap->m_excess.size() && gap->m_excess[excess_ptr] == j) { | |
- gap_j += 256L; | |
- ++excess_ptr; | |
- } | |
- | |
- if (j + sum + gap_j >= a) { b = j; c = sum + gap_j; return; } | |
- else { sum += gap_j; ++j; } | |
- } | |
- } | |
- | |
- //============================================================================== | |
- // Compute gap[0] + gap[1] + .. + gap[j - 1] with the help of gapsum array. | |
- //============================================================================== | |
- static long compute_sum3(const inmem_gap_array *gap, long j, | |
- long max_block_size, long *gapsum) { | |
- long block_id = j / max_block_size; | |
- long result = gapsum[block_id]; | |
- | |
- long scan_beg = block_id * max_block_size; | |
- long scan_end = j; | |
- long occ = std::upper_bound(gap->m_excess.begin(), gap->m_excess.end(), scan_end - 1) | |
- - std::lower_bound(gap->m_excess.begin(), gap->m_excess.end(), scan_beg); | |
- result += 256L * std::max(0L, occ); | |
- for (long i = block_id * max_block_size; i < j; ++i) | |
- result += gap->m_count[i]; | |
- | |
- return result; | |
- } | |
- | |
- //============================================================================== | |
- // Compute sum of gap values for blocks in range [range_beg..range_end). | |
- // The sum for each block is stored in gapsum array. | |
- //============================================================================== | |
- static void compute_sum2(const inmem_gap_array *gap, long range_beg, | |
- long range_end, long max_block_size, long *gapsum) { | |
- for (long block_id = range_beg; block_id < range_end; ++block_id) { | |
- long block_beg = block_id * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, gap->m_length); | |
- | |
- // Process block. | |
- long occ = std::upper_bound(gap->m_excess.begin(), gap->m_excess.end(), block_end - 1) | |
- - std::lower_bound(gap->m_excess.begin(), gap->m_excess.end(), block_beg); | |
- long block_gap_sum = 256L * std::max(0L, occ); | |
- for (long j = block_beg; j < block_end; ++j) | |
- block_gap_sum += gap->m_count[j]; | |
- | |
- gapsum[block_id] = block_gap_sum; | |
- } | |
- } | |
- | |
- //============================================================================== | |
- // Parallel computaton of answers to n_queries queries of the form: | |
- // What is the smallest j such that j + gap[0] + .. + gap[j] >= a[i]" | |
- // - the answer to i-th query is stored in b[i] | |
- // - in addition we also return gap[0] + .. + gap[j] in c[i] | |
- // | |
- // To do that we first split the gap array into blocks of size of about | |
- // length / max_threads and (in parallel) compute sums of gap values inside | |
- // these blocks. We the accumulate these sums into array of prefix sums. | |
- // | |
- // To answer each of the queries we start a separate thread. Each thread uses | |
- // the partial sums of gap array at block boundaries to find a good starting | |
- // point for search and then scans the gap array from there. | |
- //============================================================================== | |
- long answer_queries(long n_queries, const long *a, long *b, long *c, | |
- long max_threads, long i0) const { | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: split gap array into at most max_threads blocks | |
- // and in parallel compute sum of values inside each block. | |
- //---------------------------------------------------------------------------- | |
- long max_block_size = std::min(4L << 20, (m_length + max_threads - 1) / max_threads); | |
- long n_blocks = (m_length + max_block_size - 1) / max_block_size; | |
- long *gapsum = new long[n_blocks]; | |
- | |
- // Each thread handles range of blocks. | |
- long range_size = (n_blocks + max_threads - 1) / max_threads; | |
- long n_ranges = (n_blocks + range_size - 1) / range_size; | |
- std::thread **threads = new std::thread*[max_threads]; | |
- for (long range_id = 0; range_id < n_ranges; ++range_id) { | |
- long range_beg = range_id * range_size; | |
- long range_end = std::min(range_beg + range_size, n_blocks); | |
- | |
- threads[range_id] = new std::thread(compute_sum2, this, | |
- range_beg, range_end, max_block_size, gapsum); | |
- } | |
- for (long i = 0; i < n_ranges; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_ranges; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: compute partial sum from block counts. | |
- //---------------------------------------------------------------------------- | |
- for (long i = 0, s = 0, t; i < n_blocks; ++i) | |
- { t = gapsum[i]; gapsum[i] = s; s += t; } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 3: Answer the queries in parallel. | |
- //---------------------------------------------------------------------------- | |
- threads = new std::thread*[n_queries]; | |
- for (long i = 0; i < n_queries; ++i) | |
- threads[i] = new std::thread(answer_single_gap_query, this, | |
- max_block_size, gapsum, a[i], std::ref(b[i]), std::ref(c[i])); | |
- for (long i = 0; i < n_queries; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_queries; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- long result = -1; | |
- if (i0 != -1) | |
- result = compute_sum3(this, i0 + 1, max_block_size, gapsum); | |
- | |
- delete[] gapsum; | |
- | |
- return result; | |
- } | |
-}; | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_GAP_ARRAY_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_psascan.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_psascan.h | |
deleted file mode 100644 | |
index a71347c8..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_psascan.h | |
+++ /dev/null | |
@@ -1,309 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/inmem_psascan.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_PSASCAN_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_PSASCAN_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cmath> | |
-#include <vector> | |
-#include <limits> | |
-#include <algorithm> | |
- | |
-#include "../bitvector.h" | |
-#include "../multifile.h" | |
-#include "../background_block_reader.h" | |
-#include "inmem_gap_array.h" | |
-#include "compute_initial_gt_bitvectors.h" | |
-#include "initial_partial_sufsort.h" | |
-#include "change_gt_reference_point.h" | |
-#include "inmem_bwt_from_sa.h" | |
-#include "inmem_compute_initial_ranks.h" | |
-#include "parallel_merge.h" | |
-#include "inmem_bwtsa_merge.h" | |
-#include "pagearray.h" | |
-#include "bwtsa.h" | |
-#include "parallel_shrink.h" | |
-#include "merge_schedule.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename saidx_t, unsigned pagesize_log = 12> | |
-void inmem_psascan( | |
- unsigned char *text, | |
- long text_length, | |
- unsigned char *sa_bwt, | |
- long max_threads = 1, | |
- bool compute_bwt = false, | |
- bool compute_gt_begin = false, | |
- bitvector *gt_begin = NULL, | |
- long max_blocks = -1, | |
- long text_beg = 0, | |
- long text_end = 0, | |
- long supertext_length = 0, | |
- std::string supertext_filename = "", | |
- const multifile *tail_gt_begin_reversed = NULL, | |
- long *i0 = NULL, | |
- unsigned char *tail_prefix_preread = NULL) { | |
- static const unsigned pagesize = (1U << pagesize_log); | |
- long double absolute_start = utils::wclock(); | |
- long double start; | |
- | |
- if ((long)std::numeric_limits<saidx_t>::max() < text_length) { | |
- fprintf(stderr, "Error: text is too long (%ld bytes),\n", text_length); | |
- fprintf(stderr, " std::numeric_limits<saidx_t>::max() = %ld\n", | |
- (long)std::numeric_limits<saidx_t>::max()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- if (max_blocks == -1) | |
- max_blocks = max_threads; | |
- | |
- if (text_end == 0) { | |
- supertext_length = text_length; | |
- text_end = text_length; | |
- text_beg = 0; | |
- supertext_filename = ""; | |
- tail_gt_begin_reversed = NULL; | |
- } | |
- | |
- bool has_tail = (text_end != supertext_length); | |
- | |
- if (!has_tail && tail_prefix_preread != NULL) { | |
- fprintf(stderr, "Error: has_tail == false but tail_prefix_preread != NULL\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- long alignment_unit = (long)std::max(pagesize, 8U); | |
- long max_block_size = (text_length + max_blocks - 1) / max_blocks; | |
- while ((max_block_size & (alignment_unit - 1)) && max_block_size < text_length) | |
- ++max_block_size; | |
- | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- | |
- if (!compute_gt_begin) { | |
- if (gt_begin) { | |
- fprintf(stderr, "Error: check gt_begin == NULL failed\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- if (n_blocks > 1 || has_tail) | |
- gt_begin = new bitvector(text_length); | |
- } else { | |
- if (!gt_begin) { | |
- fprintf(stderr, "inmem_sascan: gt_begin was requested but is not allocated!\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- } | |
- | |
- fprintf(stderr, "Text length = %ld (%.2LfMiB)\n", text_length, text_length / (1024.L * 1024)); | |
- fprintf(stderr, "Max block size = %ld (%.2LfMiB)\n", max_block_size, max_block_size / (1024.L * 1024)); | |
- fprintf(stderr, "Max blocks = %ld\n", max_blocks); | |
- fprintf(stderr, "Number of blocks = %ld\n", n_blocks); | |
- fprintf(stderr, "Max threads = %ld\n", max_threads); | |
- fprintf(stderr, "sizeof(saidx_t) = %lu\n", sizeof(saidx_t)); | |
- fprintf(stderr, "Pagesize = %u\n", (1U << pagesize_log)); | |
- fprintf(stderr, "Compute bwt = %s\n", compute_bwt ? "true" : "false"); | |
- fprintf(stderr, "Compute gt begin = %s\n", compute_gt_begin ? "true" : "false"); | |
- fprintf(stderr, "Text beg = %ld\n", text_beg); | |
- fprintf(stderr, "Text end = %ld\n", text_end); | |
- fprintf(stderr, "Supertext length = %ld (%.2LfMiB)\n", supertext_length, supertext_length / (1024.L * 1024)); | |
- fprintf(stderr, "Supertext filename = %s\n", supertext_filename.c_str()); | |
- fprintf(stderr, "Has tail = %s\n", has_tail ? "true" : "false"); | |
- fprintf(stderr, "\n"); | |
- | |
- bwtsa_t<saidx_t> *bwtsa = (bwtsa_t<saidx_t> *)sa_bwt; | |
- | |
- // Initialize reading of the tail prefix in the background. | |
- long tail_length = supertext_length - text_end; | |
- long tail_prefix_length = std::min(text_length, tail_length); | |
- | |
- background_block_reader *tail_prefix_background_reader = NULL; | |
- if (has_tail && tail_prefix_preread == NULL) | |
- tail_prefix_background_reader = | |
- new background_block_reader(supertext_filename, text_end, tail_prefix_length); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: compute initial bitvectors, and partial suffix arrays. | |
- //---------------------------------------------------------------------------- | |
- if (n_blocks > 1 || compute_gt_begin || has_tail) { | |
- fprintf(stderr, "Compute initial bitvectors:\n"); | |
- start = utils::wclock(); | |
- compute_initial_gt_bitvectors(text, text_length, gt_begin, max_block_size, | |
- max_threads, text_end, supertext_length, tail_gt_begin_reversed, | |
- tail_prefix_background_reader, tail_prefix_preread); | |
- fprintf(stderr, "Time: %.2Lf\n\n", utils::wclock() - start); | |
- } | |
- | |
- fprintf(stderr, "Initial sufsort:\n"); | |
- start = utils::wclock(); | |
- initial_partial_sufsort(text, text_length, gt_begin, bwtsa, max_block_size, max_threads, has_tail); | |
- fprintf(stderr, "Time: %.2Lf\n", utils::wclock() - start); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: compute matrix of block ranks. | |
- //---------------------------------------------------------------------------- | |
- fprintf(stderr, "Compute matrix of initial ranks: "); | |
- start = utils::wclock(); | |
- long **block_rank_matrix = new long*[n_blocks]; | |
- for (long j = 0; j < n_blocks; ++j) | |
- block_rank_matrix[j] = new long[n_blocks]; | |
- compute_block_rank_matrix<saidx_t>(text, text_length, bwtsa, | |
- max_block_size, text_beg, supertext_length, supertext_filename, | |
- tail_gt_begin_reversed, tail_prefix_background_reader, | |
- tail_prefix_preread, block_rank_matrix); | |
- | |
- // Stop reading next block in the background or free memory taken by next block. | |
- if (has_tail) { | |
- if (tail_prefix_background_reader != NULL) { | |
- tail_prefix_background_reader->stop(); | |
- delete tail_prefix_background_reader; | |
- } else free(tail_prefix_preread); | |
- } | |
- | |
- fprintf(stderr, "%.2Lf\n\n", utils::wclock() - start); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 3: compute the gt bitvectors for blocks that will be on the right | |
- // side during the merging. | |
- //---------------------------------------------------------------------------- | |
- if (n_blocks > 1 || compute_gt_begin) { | |
- fprintf(stderr, "Overwriting gt_end with gt_begin: "); | |
- start = utils::wclock(); | |
- gt_end_to_gt_begin(text, text_length, gt_begin, max_block_size); | |
- fprintf(stderr, "%.2Lf\n\n", utils::wclock() - start); | |
- } | |
- | |
- float rl_ratio = 10.L; // estimated empirically | |
- long max_ram_usage_per_input_byte = 10L; // peak ram usage = 10n | |
- int max_left_size = std::max(1, (int)floor(n_blocks * (((long double)max_ram_usage_per_input_byte - (2.125L + sizeof(saidx_t))) / 5.L))); | |
- fprintf(stderr, "Assumed rl_ratio: %.2f\n", rl_ratio); | |
- fprintf(stderr, "Max left size = %d\n", max_left_size); | |
- fprintf(stderr, "Peak memory usage during last merging = %.3Lfn\n", | |
- (2.125L + sizeof(saidx_t)) + (5.L * max_left_size) / n_blocks); | |
- MergeSchedule schedule(n_blocks, rl_ratio, max_left_size); | |
- | |
- fprintf(stderr, "Skewed merge schedule:\n"); | |
- print_schedule(schedule, n_blocks); | |
- fprintf(stderr, "\n"); | |
- | |
- long *i0_array = new long[n_blocks]; | |
- if (n_blocks > 1 || compute_bwt) { | |
- for (long block_id = 0; block_id < n_blocks; ++block_id) { | |
- long block_end = text_length - (n_blocks - 1 - block_id) * max_block_size; | |
- long block_beg = std::max(0L, block_end - max_block_size); | |
- long block_size = block_end - block_beg; | |
- | |
- if (block_id + 1 != n_blocks || compute_bwt) { | |
- fprintf(stderr, "Computing BWT for block %ld: ", block_id + 1); | |
- long double bwt_start = utils::wclock(); | |
- compute_bwt_in_bwtsa<saidx_t>(text + block_beg, block_size, | |
- bwtsa + block_beg, max_threads, i0_array[block_id]); | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - bwt_start); | |
- } | |
- } | |
- fprintf(stderr, "\n"); | |
- } | |
- | |
- if (n_blocks > 1) { | |
- long i0_result; | |
- pagearray<bwtsa_t<saidx_t>, pagesize_log> *result = | |
- inmem_bwtsa_merge<saidx_t, pagesize_log>(text, text_length, bwtsa, | |
- gt_begin, max_block_size, 0, n_blocks, max_threads, compute_gt_begin, | |
- compute_bwt, i0_result, schedule, text_beg, text_end, | |
- supertext_length, supertext_filename, tail_gt_begin_reversed, | |
- i0_array, block_rank_matrix); | |
- if (i0) *i0 = i0_result; | |
- | |
- // Permute SA to plain array. | |
- fprintf(stderr, "\nPermuting the resulting SA to plain array: "); | |
- start = utils::wclock(); | |
- result->permute_to_plain_array(max_threads); | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- delete result; | |
- } else if (compute_bwt) { | |
- if (i0) *i0 = i0_array[0]; | |
- } | |
- delete[] i0_array; | |
- for (long j = 0; j < n_blocks; ++j) | |
- delete[] block_rank_matrix[j]; | |
- delete[] block_rank_matrix; | |
- | |
- if (!compute_gt_begin && (n_blocks > 1 || has_tail)) { | |
- delete gt_begin; | |
- gt_begin = NULL; | |
- } | |
- | |
- unsigned char *bwt = NULL; | |
- if (compute_bwt) { | |
- // Allocate aux, copy bwt into aux. | |
- fprintf(stderr, "Copying bwtsa.bwt into aux memory: "); | |
- start = utils::wclock(); | |
- bwt = (unsigned char *)malloc(text_length); | |
- parallel_copy<bwtsa_t<saidx_t>, unsigned char>(bwtsa, bwt, text_length, max_threads); | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- } | |
- | |
- fprintf(stderr, "Shrinking bwtsa.sa into sa: "); | |
- start = utils::wclock(); | |
- | |
- parallel_shrink<bwtsa_t<saidx_t>, saidx_t>(bwtsa, text_length, max_threads); | |
- | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- | |
- if (compute_bwt) { | |
- // Copy from aux into the end of bwtsa. | |
- fprintf(stderr, "Copying bwt from aux memory to the end of bwtsa: "); | |
- start = utils::wclock(); | |
- unsigned char *dest = (unsigned char *)(((saidx_t *)bwtsa) + text_length); | |
- parallel_copy<unsigned char, unsigned char>(bwt, dest, text_length, max_threads); | |
- free(bwt); | |
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start); | |
- } | |
- | |
- long double total_sascan_time = utils::wclock() - absolute_start; | |
- fprintf(stderr, "\nTotal time:\n"); | |
- fprintf(stderr, "\tabsolute: %.2Lf\n", total_sascan_time); | |
- fprintf(stderr, "\trelative: %.4Lfs/MiB\n", total_sascan_time / ((long double)text_length / (1 << 20))); | |
- fprintf(stderr, "Speed: %.2LfMiB/s\n", ((long double)text_length / (1 << 20)) / total_sascan_time); | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_PSASCAN_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_stream.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_stream.h | |
deleted file mode 100644 | |
index bc3f4f1c..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_stream.h | |
+++ /dev/null | |
@@ -1,276 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/inmem_stream.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_STREAM_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_STREAM_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstring> | |
-#include <iostream> | |
-#include <queue> | |
-#include <string> | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
-#include <algorithm> | |
- | |
-#include "../bitvector.h" | |
-#include "../gap_buffer.h" | |
-#include "../utils.h" | |
-#include "rank.h" | |
-#include "inmem_update.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-//============================================================================== | |
-// The main streaming function. | |
-// | |
-// Note: | |
-// * it reads and writes bits in range | |
-// [stream_block_beg..stream_block_end) from gt bitvector right to left. | |
-//============================================================================== | |
-template<typename rank_type, typename block_offset_type> | |
-void inmem_parallel_stream( | |
- const unsigned char *text, | |
- long text_length, | |
- long stream_block_beg, | |
- long stream_block_end, | |
- unsigned char last, | |
- const long *count, | |
- gap_buffer_poll<block_offset_type> *full_gap_buffers, | |
- gap_buffer_poll<block_offset_type> *empty_gap_buffers, | |
- block_offset_type i, | |
- block_offset_type i0, | |
- const rank_type *rank, | |
- long gap_range_size, | |
- long n_increasers, | |
- bitvector *gt, | |
- block_offset_type *temp, | |
- int *oracle, | |
- bool need_gt) { | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: initialize structures necessary to do the buffer partitions. | |
- //---------------------------------------------------------------------------- | |
- static const int max_buckets = 4096; | |
- int *block_id_to_sblock_id = new int[max_buckets]; | |
- | |
- long bucket_size = 1; | |
- long bucket_size_bits = 0; | |
- while ((gap_range_size + bucket_size - 1) / bucket_size > max_buckets) | |
- bucket_size <<= 1, ++bucket_size_bits; | |
- long n_buckets = (gap_range_size + bucket_size - 1) / bucket_size; | |
- int *block_count = new int[n_buckets]; | |
- | |
- static const long buffer_sample_size = 512; | |
- std::vector<block_offset_type> samples(buffer_sample_size); | |
- long *ptr = new long[n_increasers]; | |
- block_offset_type *bucket_lbound = new block_offset_type[n_increasers + 1]; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: perform the actual streaming. | |
- //---------------------------------------------------------------------------- | |
- long j = stream_block_end; | |
- bool gt_bit = gt->get(text_length - j); | |
- while (j > stream_block_beg) { | |
- // 2.a | |
- // | |
- // Get a buffer from the poll of empty buffers. | |
- std::unique_lock<std::mutex> lk(empty_gap_buffers->m_mutex); | |
- while (!empty_gap_buffers->available()) empty_gap_buffers->m_cv.wait(lk); | |
- gap_buffer<block_offset_type> *b = empty_gap_buffers->get(); | |
- lk.unlock(); | |
- empty_gap_buffers->m_cv.notify_one(); | |
- | |
- // 2.b | |
- // | |
- // Process buffer, i.e., fill with gap values. | |
- long left = j - stream_block_beg; | |
- b->m_filled = std::min(left, b->m_size); | |
- std::fill(block_count, block_count + n_buckets, 0); | |
- | |
- if (need_gt) { | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- bool new_gt_bit = (i > i0); | |
- if (new_gt_bit) gt->set(text_length - j); | |
- else gt->reset(text_length - j); | |
- | |
- unsigned char c = text[j - 1]; | |
- | |
- // Compute new i. | |
- int delta = (new_gt_bit && c == 0); | |
- i = (block_offset_type)(count[c] + rank->rank(i, c) - delta); | |
- if (c == last && gt_bit) ++i; | |
- | |
- temp[t] = i; | |
- block_count[i >> bucket_size_bits]++; | |
- | |
- --j; | |
- gt_bit = gt->get(text_length - j); | |
- } | |
- } else { | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- bool new_gt_bit = (i > i0); | |
- | |
- unsigned char c = text[j - 1]; | |
- | |
- // Compute new i. | |
- int delta = (new_gt_bit && c == 0); | |
- i = (block_offset_type)(count[c] + rank->rank(i, c) - delta); | |
- if (c == last && gt_bit) ++i; | |
- | |
- temp[t] = i; | |
- block_count[i >> bucket_size_bits]++; | |
- | |
- --j; | |
- gt_bit = gt->get(text_length - j); | |
- } | |
- | |
- } | |
- | |
- // 2.c | |
- // | |
- // Partition the buffer into equal n_increasers parts. | |
- | |
- // Compute super-buckets. | |
- long ideal_sblock_size = (b->m_filled + n_increasers - 1) / n_increasers; | |
- long max_sbucket_size = 0; | |
- long bucket_id_beg = 0; | |
- for (long t = 0; t < n_increasers; ++t) { | |
- long bucket_id_end = bucket_id_beg, size = 0L; | |
- while (bucket_id_end < n_buckets && size < ideal_sblock_size) | |
- size += block_count[bucket_id_end++]; | |
- b->sblock_size[t] = size; | |
- max_sbucket_size = std::min(max_sbucket_size, size); | |
- for (long id = bucket_id_beg; id < bucket_id_end; ++id) | |
- block_id_to_sblock_id[id] = t; | |
- bucket_id_beg = bucket_id_end; | |
- } | |
- | |
- if (max_sbucket_size < 4L * ideal_sblock_size) { | |
- // The quick partition was good enough. | |
- for (long t = 0, curbeg = 0; t < n_increasers; curbeg += b->sblock_size[t++]) | |
- b->sblock_beg[t] = ptr[t] = curbeg; | |
- | |
- // Permute the elements of the buffer. | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- long id = (temp[t] >> bucket_size_bits); | |
- long sblock_id = block_id_to_sblock_id[id]; | |
- oracle[t] = ptr[sblock_id]++; | |
- } | |
- | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- long addr = oracle[t]; | |
- b->m_content[addr] = temp[t]; | |
- } | |
- } else { | |
- // Repeat the partition into sbuckets, this time using random sample. | |
- // This is a fallback mechanism in case the quick partition failed, | |
- // and is expected to happen very rarely. | |
- | |
- // Compute random sample of elements in the buffer. | |
- for (long t = 0; t < buffer_sample_size; ++t) | |
- samples[t] = temp[utils::random_long(0L, b->m_filled - 1)]; | |
- std::sort(samples.begin(), samples.end()); | |
- samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); | |
- | |
- // Compute bucket boundaries (lower bound is enough). | |
- std::fill(bucket_lbound, bucket_lbound + n_increasers + 1, gap_range_size); | |
- | |
- long step = (samples.size() + n_increasers - 1) / n_increasers; | |
- for (size_t t = 1, p = step; p < samples.size(); ++t, p += step) | |
- bucket_lbound[t] = (samples[p - 1] + samples[p] + 1) / 2; | |
- bucket_lbound[0] = 0; | |
- | |
- // Compute bucket sizes and sblock id into oracle array. | |
- std::fill(b->sblock_size, b->sblock_size + n_increasers, 0L); | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- block_offset_type x = temp[t]; | |
- int id = n_increasers; | |
- while (bucket_lbound[id] > x) --id; | |
- oracle[t] = id; | |
- b->sblock_size[id]++; | |
- } | |
- | |
- // Permute elements into their own buckets using oracle. | |
- for (long t = 0, curbeg = 0; t < n_increasers; curbeg += b->sblock_size[t++]) | |
- b->sblock_beg[t] = ptr[t] = curbeg; | |
- | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- long sblock_id = oracle[t]; | |
- oracle[t] = ptr[sblock_id]++; | |
- } | |
- | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- long addr = oracle[t]; | |
- b->m_content[addr] = temp[t]; | |
- } | |
- } | |
- | |
- // 2.d | |
- // | |
- // Add the buffer to the poll of full buffers and notify waiting thread. | |
- std::unique_lock<std::mutex> lk2(full_gap_buffers->m_mutex); | |
- full_gap_buffers->add(b); | |
- lk2.unlock(); | |
- full_gap_buffers->m_cv.notify_one(); | |
- } | |
- | |
- //--------------------------------------------------------------------------- | |
- // STEP 3: Clean up. | |
- //--------------------------------------------------------------------------- | |
- | |
- // Report that another thread has finished. | |
- std::unique_lock<std::mutex> lk(full_gap_buffers->m_mutex); | |
- full_gap_buffers->increment_finished_workers(); | |
- lk.unlock(); | |
- | |
- // Notify waiting update threads in case no more buffers | |
- // are going to be produced by streaming threads. | |
- full_gap_buffers->m_cv.notify_one(); | |
- | |
- delete[] block_count; | |
- delete[] block_id_to_sblock_id; | |
- delete[] ptr; | |
- delete[] bucket_lbound; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_STREAM_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_update.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_update.h | |
deleted file mode 100644 | |
index a4677d47..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_update.h | |
+++ /dev/null | |
@@ -1,227 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/inmem_update.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_UPDATE_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_UPDATE_H_INCLUDED | |
- | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
-#include <algorithm> | |
- | |
-#include "../gap_buffer.h" | |
-#include "../utils.h" | |
-#include "inmem_gap_array.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-//============================================================================== | |
-// This object creates a given number of threads that will perform gap array | |
-// updates. Most of the time all threads are sleeping on a conditional variable. | |
-// Once the gap buffer is available for processing, they are all woken up and | |
-// perform the update in parallel. The caller then waits until all threads are | |
-// finished and then puts the gap buffer in the poll of empty buffers. | |
-// | |
-// Only one object of this class should exist. | |
-//============================================================================== | |
-template<typename block_offset_type> | |
-struct gap_parallel_updater { | |
- | |
- template<typename T> | |
- static void parallel_update(gap_parallel_updater<T> *updater, int id) { | |
- while (true) { | |
- // Wait until there is a gap buffer available or the | |
- // message 'no more buffers' arrives. | |
- std::unique_lock<std::mutex> lk(updater->m_avail_mutex); | |
- while (!(updater->m_avail[id]) && !(updater->m_avail_no_more)) | |
- updater->m_avail_cv.wait(lk); | |
- | |
- if (!(updater->m_avail[id]) && updater->m_avail_no_more) { | |
- // No more buffers -- exit. | |
- lk.unlock(); | |
- return; | |
- } | |
- | |
- updater->m_avail[id] = false; | |
- lk.unlock(); | |
- | |
- // Safely perform the update. | |
- const gap_buffer<T> *buf = updater->m_buffer; | |
- inmem_gap_array *gap = updater->m_gap_array; | |
- int beg = buf->sblock_beg[id]; | |
- int end = beg + buf->sblock_size[id]; | |
- | |
- for (int i = beg; i < end; ++i) { | |
- T x = buf->m_content[i]; | |
- gap->m_count[x]++; | |
- | |
- // Check if values wrapped-around. | |
- if (gap->m_count[x] == 0) { | |
- gap->m_excess_mutex.lock(); | |
- gap->m_excess.push_back(x); | |
- gap->m_excess_mutex.unlock(); | |
- } | |
- } | |
- | |
- // Update the number of finished threads. | |
- bool finished_last = false; | |
- std::unique_lock<std::mutex> lk2(updater->m_finished_mutex); | |
- updater->m_finished++; | |
- if (updater->m_finished == updater->m_threads_cnt) | |
- finished_last = true; | |
- lk2.unlock(); | |
- | |
- // If this was the last thread finishing, let the caller know. | |
- if (finished_last) | |
- updater->m_finished_cv.notify_one(); | |
- } | |
- } | |
- | |
- gap_parallel_updater(inmem_gap_array *gap_array, int threads_cnt) | |
- : m_gap_array(gap_array), | |
- m_threads_cnt(threads_cnt), | |
- m_avail_no_more(false) { | |
- m_avail = new bool[m_threads_cnt]; | |
- std::fill(m_avail, m_avail + m_threads_cnt, false); | |
- m_threads = new std::thread*[m_threads_cnt]; | |
- | |
- // After this, threads immediately hang up on m_avail_cv. | |
- for (int i = 0; i < m_threads_cnt; ++i) | |
- m_threads[i] = new std::thread(parallel_update<block_offset_type>, this, i); | |
- } | |
- | |
- ~gap_parallel_updater() { | |
- // Signal all threads to finish. | |
- std::unique_lock<std::mutex> lk(m_avail_mutex); | |
- m_avail_no_more = true; | |
- lk.unlock(); | |
- m_avail_cv.notify_all(); | |
- | |
- // Wait until all threads finish and release memory. | |
- for (int i = 0; i < m_threads_cnt; ++i) { | |
- m_threads[i]->join(); | |
- delete m_threads[i]; | |
- } | |
- delete[] m_threads; | |
- delete[] m_avail; | |
- } | |
- | |
- void update(const gap_buffer<block_offset_type> *buffer) { | |
- // Prepare a message for each thread that new buffer is available. | |
- std::unique_lock<std::mutex> lk(m_avail_mutex); | |
- m_finished = 0; | |
- m_buffer = buffer; | |
- for (int i = 0; i < m_threads_cnt; ++i) | |
- m_avail[i] = true; | |
- lk.unlock(); | |
- | |
- // Wake up all threads to perform the update. | |
- m_avail_cv.notify_all(); | |
- | |
- // Wait until all threads report that they are done. | |
- std::unique_lock<std::mutex> lk2(m_finished_mutex); | |
- while (m_finished != m_threads_cnt) | |
- m_finished_cv.wait(lk2); | |
- lk2.unlock(); | |
- | |
- // We are done processing the buffer. The caller of this method | |
- // can now place the buffer into the poll of empty buffers. | |
- } | |
- | |
-private: | |
- inmem_gap_array *m_gap_array; | |
- | |
- std::thread **m_threads; | |
- int m_threads_cnt; | |
- | |
- const gap_buffer<block_offset_type> *m_buffer; | |
- | |
- // For notifying threads about available buffer. | |
- std::mutex m_avail_mutex; | |
- std::condition_variable m_avail_cv; | |
- bool *m_avail; | |
- bool m_avail_no_more; | |
- | |
- // The mutex below is to protect m_finished. The condition | |
- // variable allows the caller to wait (and to be notified when done) | |
- // until threads complete processing their section of the buffer. | |
- int m_finished; | |
- std::mutex m_finished_mutex; | |
- std::condition_variable m_finished_cv; | |
-}; | |
- | |
-template<typename block_offset_type> | |
-void inmem_gap_updater(gap_buffer_poll<block_offset_type> *full_gap_buffers, | |
- gap_buffer_poll<block_offset_type> *empty_gap_buffers, | |
- inmem_gap_array *gap, long n_increasers) { | |
- | |
- gap_parallel_updater<block_offset_type> *updater = | |
- new gap_parallel_updater<block_offset_type>(gap, n_increasers); | |
- | |
- while (true) { | |
- // Get a buffer from the poll of full buffers. | |
- std::unique_lock<std::mutex> lk(full_gap_buffers->m_mutex); | |
- while (!full_gap_buffers->available() && !full_gap_buffers->finished()) | |
- full_gap_buffers->m_cv.wait(lk); | |
- | |
- if (!full_gap_buffers->available() && full_gap_buffers->finished()) { | |
- // There will be no more full buffers -- exit. | |
- lk.unlock(); | |
- break; | |
- } | |
- | |
- gap_buffer<block_offset_type> *b = full_gap_buffers->get(); | |
- lk.unlock(); | |
- | |
- // Process buffer. | |
- updater->update(b); | |
- | |
- // Add the buffer to the poll of empty buffers and notify | |
- // the waiting thread. | |
- std::unique_lock<std::mutex> lk2(empty_gap_buffers->m_mutex); | |
- empty_gap_buffers->add(b); | |
- lk2.unlock(); | |
- empty_gap_buffers->m_cv.notify_one(); | |
- } | |
- | |
- delete updater; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_UPDATE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/merge_schedule.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/merge_schedule.h | |
deleted file mode 100644 | |
index 868163eb..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/merge_schedule.h | |
+++ /dev/null | |
@@ -1,138 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/merge_schedule.h | |
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_MERGE_SCHEDULE_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_MERGE_SCHEDULE_H_INCLUDED | |
- | |
-#include <iostream> | |
-#include <vector> | |
-#include <cassert> | |
-#include <cstdlib> | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-class MergeSchedule { | |
-private: | |
- float rl_ratio; | |
- std::vector<int> split; | |
- std::vector<int> left_cost; | |
- std::vector<int> right_cost; | |
- | |
-public: | |
- MergeSchedule(int no_of_blocks, float right_left_ratio, | |
- int max_left_size = 0) | |
- { reset(no_of_blocks, right_left_ratio, max_left_size); } | |
- | |
- int left_size(int n) const { | |
- assert(n < (long)split.size()); | |
- return split[n]; | |
- } | |
- int right_size(int n) const { | |
- assert(n < (long)split.size()); | |
- return n - split[n]; | |
- } | |
- float cost(int n) const { | |
- assert(n < (long)split.size()); | |
- return (left_cost[n] + rl_ratio * right_cost[n]) / n; | |
- } | |
- float n_left_merges(int n) const { | |
- assert(n < (long)split.size()); | |
- return left_cost[n] / (1.0*n); | |
- } | |
- float n_right_merges(int n) const { | |
- assert(n < (long)split.size()); | |
- return right_cost[n] / (1.0*n); | |
- } | |
- | |
- void reset(int no_of_blocks, float right_left_ratio, | |
- int max_left_size = 0) | |
- { | |
- int n = no_of_blocks; | |
- rl_ratio = right_left_ratio; | |
- if (max_left_size == 0) { | |
- max_left_size = n-1; | |
- } | |
- | |
- split.resize(n+1); | |
- left_cost.resize(n+1); | |
- right_cost.resize(n+1); | |
- | |
- split[1] = 0; | |
- left_cost[1] = 0; | |
- right_cost[1] = 0; | |
- | |
- for (int i=2; i<=n; ++i) { | |
- //int min_l = std::min((i+1)/2, max_left_size); | |
- int max_l = std::min(i-1, max_left_size); | |
- float min_cost = 1E40; | |
- for (int l=1; l<=max_l; ++l) { | |
- int r = i-l; | |
- int l_cost = l + left_cost[l] + left_cost[r]; | |
- int r_cost = r + right_cost[l] + right_cost[r]; | |
- float total_cost = l_cost + rl_ratio * r_cost; | |
- if (total_cost < min_cost) { | |
- min_cost = total_cost; | |
- split[i] = l; | |
- left_cost[i] = l_cost; | |
- right_cost[i] = r_cost; | |
- } | |
- } | |
- } | |
- } | |
-}; | |
- | |
-void print_schedule(const MergeSchedule & sched, int n, std::string indent) { | |
- if (n == 1) { | |
- std::cerr << "1\n"; | |
- return; | |
- } | |
- std::cerr << n << "\t"; | |
- int l = sched.left_size(n); | |
- print_schedule(sched, l, indent + ":\t"); | |
- std::cerr << indent; | |
- print_schedule(sched, n-l, indent + "\t"); | |
-} | |
- | |
-void print_schedule(const MergeSchedule & sched, int n) { | |
- std::string intend = "\t"; | |
- print_schedule(sched, n, intend); | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_MERGE_SCHEDULE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/pagearray.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/pagearray.h | |
deleted file mode 100644 | |
index 3c8fa829..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/pagearray.h | |
+++ /dev/null | |
@@ -1,234 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/pagearray.h | |
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section DESCRIPTION | |
- * | |
- * The paged array representation, as described in Appending B of | |
- * | |
- * Juha Karkkainen, Peter Sanders, Stefan Burkhardt: | |
- * Linear work suffix array construction. | |
- * J. ACM 53(6), p. 918-936 (2006). | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PAGEARRAY_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PAGEARRAY_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <vector> | |
-#include <stack> | |
-#include <algorithm> | |
-#include <thread> | |
-#include <mutex> | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename T, unsigned k_pagesize_log = 12U> | |
-struct pagearray { | |
- static const unsigned pagesize_log = k_pagesize_log; | |
- static const unsigned pagesize = (1U << k_pagesize_log); | |
- static const unsigned pagesize_mask = (1U << k_pagesize_log) - 1; | |
- | |
- typedef T value_type; | |
- typedef pagearray<value_type, k_pagesize_log> pagearray_type; | |
- | |
- long m_length; | |
- long m_shift; | |
- | |
- value_type *m_origin; | |
- value_type **m_pageindex; | |
- | |
- // Initialize empty page array, possible it will be | |
- // a result of merging two page arrays. | |
- pagearray(value_type *origin, long length) { | |
- m_length = length; | |
- m_origin = origin; | |
- m_shift = (pagesize - m_length % pagesize) % pagesize; | |
- | |
- long n_pages = (m_length + pagesize - 1) / pagesize; | |
- m_pageindex = new value_type*[n_pages + 1]; | |
- } | |
- | |
- // Build page array from plain array. | |
- pagearray(value_type *begin, value_type *end) { | |
- m_length = end - begin; | |
- m_origin = begin; | |
- m_shift = (pagesize - m_length % pagesize) % pagesize; | |
- | |
- long n_pages = (m_length + pagesize - 1) / pagesize; | |
- m_pageindex = new value_type*[n_pages + 1]; | |
- for (long i = 0; i < n_pages; ++i) | |
- m_pageindex[i] = begin + i * pagesize - m_shift; | |
- } | |
- | |
- inline value_type &operator[] (long i) const { | |
- i += m_shift; | |
- return m_pageindex[i >> pagesize_log][i & pagesize_mask]; | |
- } | |
- | |
- inline long get_page_offset(long i) const { | |
- i += m_shift; | |
- return (i & pagesize_mask); | |
- } | |
- | |
- inline long get_page_id(long i) const { | |
- i += m_shift; | |
- return (i >> pagesize_log); | |
- } | |
- | |
- inline long get_page_id(value_type *p) const { | |
- p += m_shift; | |
- return ((p - m_origin) >> pagesize_log); | |
- } | |
- | |
- inline bool owns_page(value_type *p) const { | |
- p += m_shift; | |
- return m_origin <= p && p < m_origin + m_length; | |
- } | |
- | |
- inline value_type *get_page_addr(long id) const { | |
- return m_origin + (id << pagesize_log) - m_shift; | |
- } | |
- | |
- inline bool fully_contained_page(value_type *p) const { | |
- p += m_shift; | |
- return (m_origin <= p && p + pagesize <= m_origin + m_length); | |
- } | |
- | |
- // Used only for testing. | |
- void random_shuffle() { | |
- long trimmed_length = m_length - m_length % pagesize; | |
- long n_full_pages = (trimmed_length / pagesize); | |
- for (long t = 0; t < 2 * n_full_pages; ++t) { | |
- long i = rand() % n_full_pages; | |
- long j = rand() % n_full_pages; | |
- | |
- // Swap the page content. | |
- for (long tt = 0; tt < pagesize; ++tt) | |
- std::swap(m_pageindex[i][tt], m_pageindex[j][tt]); | |
- | |
- // Update page index. | |
- std::swap(m_pageindex[i], m_pageindex[j]); | |
- } | |
- } | |
- | |
- ~pagearray() { | |
- if (m_pageindex) | |
- delete[] m_pageindex; | |
- } | |
- | |
- static void permute_to_plain_array_aux(pagearray_type &a, | |
- std::mutex *mutexes, long &selector, std::mutex &selector_mutex) { | |
- long n_pages = (a.m_length + pagesize - 1) / pagesize; | |
- | |
- // Invariant: at all times, index[i] for any i points | |
- // to content that should be placed at i-th page of tab. | |
- while (true) { | |
- // Find starting point on some cycle. | |
- long start; | |
- while (true) { | |
- // Get the candidate using selector. | |
- std::unique_lock<std::mutex> lk(selector_mutex); | |
- while (selector < n_pages && a.m_pageindex[selector] == a.get_page_addr(selector)) | |
- ++selector; | |
- | |
- // Exit, if the selector does not give any candidate. | |
- if (selector == n_pages) { | |
- lk.unlock(); | |
- return; | |
- } | |
- | |
- // Unlock selector lock, allow other threads | |
- // to look for candidates in the meantime. | |
- start = selector++; | |
- lk.unlock(); | |
- | |
- // Lock a candidate page and check if it's still good. | |
- // If yes, keep lock and proceed to process it. | |
- if (mutexes[start].try_lock() && a.m_pageindex[start] != a.get_page_addr(start)) break; | |
- } | |
- | |
- // Invariant: we have found a good candidate | |
- // page and have lock on mutexes[start]. | |
- | |
- // First, we create temporary space for the | |
- // content of page at index[start] and move | |
- // the content at index[start] to that temp space. | |
- value_type *temp = new value_type[pagesize]; | |
- std::copy(a.m_pageindex[start], a.m_pageindex[start] + pagesize, temp); | |
- std::swap(a.m_pageindex[start], temp); | |
- mutexes[start].unlock(); | |
- | |
- // We now have free space at temp. Keep placing there | |
- // elements from the cycle and moving temp pointer. | |
- do { | |
- // Invariant: temp points to a page inside tab. | |
- long next = a.get_page_id(temp); | |
- std::unique_lock<std::mutex> lk(mutexes[next]); | |
- std::copy(a.m_pageindex[next], a.m_pageindex[next] + pagesize, temp); | |
- std::swap(a.m_pageindex[next], temp); | |
- lk.unlock(); | |
- } while (a.owns_page(temp)); | |
- delete[] temp; | |
- } | |
- } | |
- | |
- void permute_to_plain_array(long max_threads) { | |
- long n_pages = (m_length + pagesize - 1) / pagesize; | |
- long selector = 0; | |
- | |
- std::mutex selector_mutex; | |
- std::mutex *mutexes = new std::mutex[n_pages]; | |
- std::thread **threads = new std::thread*[max_threads]; | |
- | |
- for (long i = 0; i < max_threads; ++i) | |
- threads[i] = new std::thread(permute_to_plain_array_aux, | |
- std::ref(*this), mutexes, std::ref(selector), std::ref(selector_mutex)); | |
- | |
- for (long i = 0; i < max_threads; ++i) threads[i]->join(); | |
- for (long i = 0; i < max_threads; ++i) delete threads[i]; | |
- delete[] threads; | |
- delete[] mutexes; | |
- delete[] m_pageindex; | |
- m_pageindex = NULL; | |
- } | |
-}; | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PAGEARRAY_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_copy.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_copy.h | |
deleted file mode 100644 | |
index f6060750..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_copy.h | |
+++ /dev/null | |
@@ -1,136 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/parallel_copy.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_COPY_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_COPY_H_INCLUDED | |
- | |
-#include <algorithm> | |
-#include <thread> | |
- | |
-#include "../uint40.h" | |
-#include "bwtsa.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename T, typename S> | |
-void parallel_copy_aux(const T *src, S *dest, long length) { | |
- for (long i = 0; i < length; ++i) | |
- dest[i] = (S)src[i]; | |
-} | |
- | |
-// Specilization | |
-template<> | |
-void parallel_copy_aux(const bwtsa_t<uint40> *src, unsigned char *dest, long length) { | |
- for (long i = 0; i < length; ++i) | |
- dest[i] = src[i].bwt; | |
-} | |
- | |
-// Specilization | |
-template<> | |
-void parallel_copy_aux(const bwtsa_t<int> *src, unsigned char *dest, long length) { | |
- for (long i = 0; i < length; ++i) | |
- dest[i] = src[i].bwt; | |
-} | |
- | |
- | |
-// Conversion from T to S has to make sense. | |
-template<typename T, typename S> | |
-void parallel_copy(const T *src, S *dest, long length, long max_threads) { | |
- long max_block_size = (length + max_threads - 1) / max_threads; | |
- long n_blocks = (length + max_block_size - 1) / max_block_size; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_beg = i * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(parallel_copy_aux<T, S>, | |
- src + block_beg, dest + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
-} | |
- | |
-// Specialization | |
-template<> | |
-void parallel_copy(const bwtsa_t<uint40> *src, unsigned char *dest, long length, long max_threads) { | |
- long max_block_size = (length + max_threads - 1) / max_threads; | |
- long n_blocks = (length + max_block_size - 1) / max_block_size; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_beg = i * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(parallel_copy_aux<bwtsa_t<uint40>, unsigned char>, | |
- src + block_beg, dest + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
-} | |
- | |
-// Specialization | |
-template<> | |
-void parallel_copy(const bwtsa_t<int> *src, unsigned char *dest, long length, long max_threads) { | |
- long max_block_size = (length + max_threads - 1) / max_threads; | |
- long n_blocks = (length + max_block_size - 1) / max_block_size; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_beg = i * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(parallel_copy_aux<bwtsa_t<int>, unsigned char>, | |
- src + block_beg, dest + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_expand.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_expand.h | |
deleted file mode 100644 | |
index 6850eb4d..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_expand.h | |
+++ /dev/null | |
@@ -1,109 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/parallel_expand.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_EXPAND_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_EXPAND_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <algorithm> | |
-#include <thread> | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename T, typename S> | |
-void parallel_expand_aux(const T *src, S *dest, long length) { | |
- for (long i = 0; i < length; ++i) | |
- dest[i] = (S)src[i]; | |
-} | |
- | |
-// Requires sizeof(T) < sizeof(S). | |
-template<typename T, typename S> | |
-S *parallel_expand(T *tab, long length, long max_threads) { | |
- S *result = (S *)tab; | |
- | |
- long diff = (long)sizeof(S) - (long)sizeof(T); | |
- if (!diff) { | |
- fprintf(stderr, "Error: expanding requires sizeof(T) < sizeof(S)\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- if (length < (1L << 20)) { | |
- // Move the elelements sequentially. | |
- for (long i = length - 1; i >= 0; --i) | |
- result[i] = (S)tab[i]; | |
- | |
- return result; | |
- } | |
- | |
- // Compute the index of the smallest element (of type T) | |
- // that lies past the end of the last element of tab | |
- // (after converting all elements to type S). | |
- long bytes_before_expanding = length * sizeof(T); | |
- long split = (bytes_before_expanding + sizeof(S) - 1) / sizeof(S); | |
- | |
- // Move the elements in the range [split, length) in parallel. | |
- // This is safe (no element overwriting) because of how we | |
- // computed the split. | |
- long elems = length - split; | |
- long max_block_size = (elems + max_threads - 1) / max_threads; | |
- long n_blocks = (elems + max_block_size - 1) / max_block_size; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_beg = split + i * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(parallel_expand_aux<T, S>, | |
- tab + block_beg, result + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- // Recursively expand the first split elements. | |
- parallel_expand<T, S>(tab, split, max_threads); | |
- | |
- return result; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_merge.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_merge.h | |
deleted file mode 100644 | |
index c1c4a666..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_merge.h | |
+++ /dev/null | |
@@ -1,290 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/parallel_merge.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section DESCRIPTION | |
- * | |
- * Parallel version of almost in-place stable merging described in | |
- * the Appending B of | |
- * | |
- * Juha Karkkainen, Peter Sanders, Stefan Burkhardt: | |
- * Linear work suffix array construction. | |
- * J. ACM 53(6), p. 918-936 (2006). | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_MERGE_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_MERGE_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <vector> | |
-#include <stack> | |
-#include <algorithm> | |
-#include <thread> | |
-#include <mutex> | |
- | |
-#include "../utils.h" | |
-#include "pagearray.h" | |
-#include "inmem_gap_array.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-//============================================================================== | |
-// Compute the range [res_beg..res_beg+res_size) of the output (i.e., the | |
-// sequence after merging). The range is guaranteed to be aligned with page | |
-// boundaries. | |
-//============================================================================== | |
-template<typename pagearray_type> | |
-void parallel_merge_aux( | |
- const pagearray_type *l_pagearray, | |
- const pagearray_type *r_pagearray, | |
- pagearray_type *output, | |
- const inmem_gap_array *gap, | |
- long left_idx, long right_idx, | |
- long remaining_gap, | |
- long page_range_beg, | |
- long page_range_end, | |
- long what_to_add) { | |
- | |
- typedef typename pagearray_type::value_type value_type; | |
- static const unsigned pagesize = pagearray_type::pagesize; | |
- | |
- long res_beg = std::max(0L, output->get_page_addr(page_range_beg) - output->m_origin); | |
- long res_end = std::min(output->m_length, output->get_page_addr(page_range_end) - output->m_origin); | |
- long res_size = res_end - res_beg; | |
- | |
- long lpage_read = 0L; | |
- long lpage_id = l_pagearray->get_page_id(left_idx); | |
- long lpage_offset = l_pagearray->get_page_offset(left_idx); | |
- value_type *lpage = l_pagearray->m_pageindex[lpage_id++]; | |
- | |
- long rpage_read = 0L; | |
- long rpage_id = r_pagearray->get_page_id(right_idx); | |
- long rpage_offset = r_pagearray->get_page_offset(right_idx); | |
- value_type *rpage = r_pagearray->m_pageindex[rpage_id++]; | |
- | |
- long pageid = output->get_page_id(res_beg); | |
- long filled = output->get_page_offset(res_beg); | |
- value_type *dest = new value_type[pagesize]; | |
- output->m_pageindex[pageid++] = dest; | |
- | |
- std::stack<value_type*> freepages; | |
- size_t excess_ptr = std::lower_bound(gap->m_excess.begin(), | |
- gap->m_excess.end(), left_idx + 1) - gap->m_excess.begin(); | |
- | |
- for (long i = 0; i < res_size; ++i) { | |
- if (filled == pagesize) { | |
- if (freepages.empty()) dest = new value_type[pagesize]; | |
- else { dest = freepages.top(); freepages.pop(); } | |
- output->m_pageindex[pageid++] = dest; | |
- filled = 0L; | |
- } | |
- if (remaining_gap > 0) { | |
- --remaining_gap; | |
- // The next element comes from the right subarray. | |
- dest[filled] = rpage[rpage_offset++]; | |
- dest[filled++].sa += what_to_add; | |
- rpage_read++; | |
- if (rpage_offset == pagesize) { | |
- // We reached the end of page in the right subarray. | |
- // We put it into free pages if we read exactly | |
- // pagesize elements from it. This means the no other | |
- // thread will attemp to read from it in the future. | |
- if (rpage_read == pagesize) freepages.push(r_pagearray->m_pageindex[rpage_id - 1]); | |
- | |
- // Note: we don't have to check, if the page below exists, because we have | |
- // a sentinel page in the page index of every pagearray. | |
- rpage = r_pagearray->m_pageindex[rpage_id++]; | |
- rpage_offset = 0L; | |
- rpage_read = 0L; | |
- } | |
- } else { | |
- // Next elem comes from the left subarray. | |
- dest[filled++] = lpage[lpage_offset++]; | |
- left_idx++; | |
- lpage_read++; | |
- | |
- // Compute gap[left_idx]. | |
- long gap_left_idx = gap->m_count[left_idx]; | |
- while (excess_ptr < gap->m_excess.size() && | |
- gap->m_excess[excess_ptr] == left_idx) { | |
- gap_left_idx += 256L; | |
- ++excess_ptr; | |
- } | |
- | |
- remaining_gap = gap_left_idx; | |
- if (lpage_offset == pagesize) { | |
- // We reached the end of page in the left | |
- // subarray, proceed analogously. | |
- if (lpage_read == pagesize) freepages.push(l_pagearray->m_pageindex[lpage_id - 1]); | |
- | |
- // Note: we don't have to check, if the page below exists, because we have | |
- // a sentinel page in the page index of every pagearray. | |
- lpage = l_pagearray->m_pageindex[lpage_id++]; | |
- lpage_offset = 0L; | |
- lpage_read = 0L; | |
- } | |
- } | |
- } | |
- | |
- // Release the unused auxiliary pages. | |
- while (!freepages.empty()) { | |
- value_type* p = freepages.top(); | |
- freepages.pop(); | |
- if (!output->owns_page(p)) | |
- delete[] p; | |
- } | |
-} | |
- | |
-template<typename pagearray_type> | |
-pagearray_type *parallel_merge(pagearray_type *l_pagearray, | |
- pagearray_type *r_pagearray, const inmem_gap_array *gap, long max_threads, | |
- long i0, long &aux_result, long what_to_add) { | |
- static const unsigned pagesize_log = pagearray_type::pagesize_log; | |
- static const unsigned pagesize = pagearray_type::pagesize; | |
- typedef typename pagearray_type::value_type value_type; | |
- typedef pagearray<value_type, pagesize_log> output_type; | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: compute the initial parameters for each thread. | |
- //---------------------------------------------------------------------------- | |
- fprintf(stderr, "queries: "); | |
- long double start = utils::wclock(); | |
- long length = l_pagearray->m_length + r_pagearray->m_length; | |
- long n_pages = (length + pagesize - 1) / pagesize; | |
- long pages_per_thread = (n_pages + max_threads - 1) / max_threads; | |
- long n_threads = (n_pages + pages_per_thread - 1) / pages_per_thread; | |
- output_type *result = new output_type(l_pagearray->m_origin, length); | |
- | |
- long *left_idx = new long[n_threads]; | |
- long *right_idx = new long[n_threads]; | |
- long *remaining_gap = new long[n_threads]; | |
- | |
- // Prepare gap queries. | |
- long *gap_query = new long[n_threads]; | |
- long *gap_answer_a = new long[n_threads]; | |
- long *gap_answer_b = new long[n_threads]; | |
- for (long i = 0; i < n_threads; ++i) { | |
- long page_range_beg = i * pages_per_thread; | |
- long res_beg = std::max(0L, result->get_page_addr(page_range_beg) - result->m_origin); | |
- gap_query[i] = res_beg; | |
- } | |
- | |
- // Answer these queries in parallel and convert the answers | |
- // to left_idx, right_idx and remaining_gap values. | |
- aux_result = gap->answer_queries(n_threads, gap_query, gap_answer_a, gap_answer_b, max_threads, i0); | |
- for (long i = 0; i < n_threads; ++i) { | |
- long page_range_beg = i * pages_per_thread; | |
- long res_beg = std::max(0L, result->get_page_addr(page_range_beg) - result->m_origin); | |
- long j = gap_answer_a[i], s = gap_answer_b[i]; | |
- left_idx[i] = j; | |
- right_idx[i] = res_beg - j; | |
- remaining_gap[i] = j + s - res_beg; | |
- } | |
- delete[] gap_query; | |
- delete[] gap_answer_a; | |
- delete[] gap_answer_b; | |
- fprintf(stderr, "%.2Lf ", utils::wclock() - start); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: merge the arrays. | |
- //---------------------------------------------------------------------------- | |
- fprintf(stderr, "merge: "); | |
- start = utils::wclock(); | |
- | |
- std::thread **threads = new std::thread*[n_threads]; | |
- for (long t = 0; t < n_threads; ++t) { | |
- long page_range_beg = t * pages_per_thread; | |
- long page_range_end = std::min(page_range_beg + pages_per_thread, n_pages); | |
- | |
- threads[t] = new std::thread(parallel_merge_aux<pagearray_type>, | |
- l_pagearray, r_pagearray, result, gap, left_idx[t], right_idx[t], | |
- remaining_gap[t], page_range_beg, page_range_end, what_to_add); | |
- } | |
- for (long t = 0; t < n_threads; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_threads; ++t) delete threads[t]; | |
- delete[] threads; | |
- delete[] left_idx; | |
- delete[] right_idx; | |
- delete[] remaining_gap; | |
- | |
- bool *usedpage = new bool[n_pages]; | |
- std::fill(usedpage, usedpage + n_pages, false); | |
- | |
- // Handle the page that was not full | |
- // manually (if there was one). | |
- if (length % pagesize) { | |
- long size = length % pagesize; | |
- value_type *src = result->m_pageindex[0]; | |
- value_type *dest = result->get_page_addr(0); | |
- std::copy(src + pagesize - size, src + pagesize, dest + pagesize - size); | |
- result->m_pageindex[0] = dest; | |
- usedpage[0] = true; | |
- | |
- // Release the lastpage if it was temporary. | |
- if (!result->owns_page(src)) | |
- delete[] src; | |
- } | |
- | |
- // Find unused input pages. | |
- std::vector<std::pair<long, value_type*> > auxpages; | |
- for (long i = 0; i < n_pages; ++i) { | |
- value_type *p = result->m_pageindex[i]; | |
- if (result->owns_page(p)) usedpage[result->get_page_id(p)] = true; | |
- else auxpages.push_back(std::make_pair(i, p)); | |
- } | |
- | |
- // Assign aux pages to unused pages in any | |
- // order and release them (aux pages). | |
- for (long i = 0, ptr = 0; i < n_pages; ++i) { | |
- if (!usedpage[i]) { | |
- long id = auxpages[ptr].first; | |
- value_type *src = auxpages[ptr++].second; | |
- value_type *dest = result->get_page_addr(i); | |
- std::copy(src, src + pagesize, dest); | |
- result->m_pageindex[id] = dest; | |
- delete[] src; | |
- } | |
- } | |
- delete[] usedpage; | |
- fprintf(stderr, "%.2Lf ", utils::wclock() - start); | |
- | |
- return result; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_MERGE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_shrink.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_shrink.h | |
deleted file mode 100644 | |
index 0283c600..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_shrink.h | |
+++ /dev/null | |
@@ -1,109 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/parallel_shrink.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED | |
- | |
-#include <algorithm> | |
-#include <thread> | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename T, typename S> | |
-void parallel_shrink_aux(T *src, S *dest, long length) { | |
- for (long i = 0; i < length; ++i) | |
- dest[i] = (S)src[i]; | |
-} | |
- | |
- | |
-// Requires sizeof(T) > sizeof(S). | |
-template<typename T, typename S> | |
-S *parallel_shrink(T *tab, long length, long max_threads) { | |
- S *result = (S *)tab; | |
- | |
- long diff = (long)sizeof(T) - (long)sizeof(S); | |
- if (!diff) { | |
- fprintf(stderr, "Error: shrinking requires sizeof(T) > sizeof(S)\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // long threshold = (sizeof(T) + diff - 1) / diff; | |
- if (length < (1L << 20)/*threshold*/) { | |
- // Move the elelements sequentially. | |
- for (long i = 0; i < length; ++i) | |
- result[i] = (S)tab[i]; | |
- | |
- return result; | |
- } | |
- | |
- // Compute the index of the smallest element (of type T) | |
- // that lies past the end of the last element of tab | |
- // (after converting all elemeents to type S). | |
- long bytes_after_shrinking = length * sizeof(S); | |
- long split = (bytes_after_shrinking + sizeof(T) - 1) / sizeof(T); | |
- | |
- // Recursively shrink the part up to (but excluding) split. | |
- parallel_shrink<T, S>(tab, split, max_threads); | |
- | |
- // Move the elements in the range [split, length) in parallel. | |
- // This is safe (no element overwriting) because of how we | |
- // computed the split. | |
- long elems = length - split; | |
- long max_block_size = (elems + max_threads - 1) / max_threads; | |
- long n_blocks = (elems + max_block_size - 1) / max_block_size; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_beg = split + i * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(parallel_shrink_aux<T, S>, | |
- tab + block_beg, result + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- return result; | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/rank.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/rank.h | |
deleted file mode 100644 | |
index 486eb8e5..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/rank.h | |
+++ /dev/null | |
@@ -1,798 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/rank.h | |
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section DESCRIPTION | |
- * | |
- * A general rank data structure. Basic idea of the encoding is from | |
- * the rank data structure used in the external-memory algorithm for | |
- * constructing the Burrows-Wheeler transform called bwtdisk (available | |
- * at: http://people.unipmn.it/manzini/bwtdisk/) described in [1]. We | |
- * extended the data structure by applying the fixed block boosting [2] | |
- * and alphabet partitioning [3] techniques. The resulting data structure | |
- * was described in [4]. This file extends the implementation used in [4] | |
- * by parallelizing the construction and introducting an alternative | |
- * encoding (called type-I in the code). Type-I encoding is a novel | |
- * encoding due to present authors. | |
- * | |
- * References: | |
- * [1] Paolo Ferragina, Travis Gagie, Giovanni Manzini: | |
- * Lightweight Data Indexing and Compression in External Memory. | |
- * Algorithmica 63(3), p. 707-730 (2012). | |
- * [2] Juha Karkkainen, Simon J. Puglisi: | |
- * Fixed Block Compression Boosting in FM-Indexes. | |
- * In Proc. SPIRE 2011, p. 174-184. | |
- * [3] Jeremy Barbay, Travis Gagie, Gonzalo Navarro, Yakov Nekrich: | |
- * Alphabet Partitioning for Compressed Rank/Select and Applications. | |
- * In Proc. ISAAC 2010, p. 315-326. | |
- * [4] Juha Karkkainen, Dominik Kempa: | |
- * Engineering a Lightweight External Memory Suffix Array Construction | |
- * Algorithm. | |
- * In Proc. ICABD 2014, p. 53-60. | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_RANK_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_RANK_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <algorithm> | |
-#include <vector> | |
-#include <thread> | |
- | |
-#include "../utils.h" | |
-#include "bwtsa.h" | |
-#include "pagearray.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template< | |
- typename saidx_t, | |
- unsigned pagesize_log, | |
- unsigned k_sblock_size_log = 24, | |
- unsigned k_cblock_size_log = 20, | |
- unsigned k_sigma_log = 8> | |
-class rank4n { | |
- private: | |
- typedef pagearray<bwtsa_t<saidx_t>, pagesize_log> pagearray_type; | |
- | |
- static const unsigned long k_cblock_size; | |
- static const unsigned long k_cblock_size_mask; | |
- static const unsigned long k_cblock_size_mask_neg; | |
- static const unsigned k_cblocks_in_sblock_log; | |
- static const unsigned k_cblocks_in_sblock; | |
- static const unsigned k_cblocks_in_sblock_mask; | |
- static const unsigned k_2cblock_size; | |
- static const unsigned k_2cblock_size_mask; | |
- static const unsigned k_sblock_size; | |
- static const unsigned k_sblock_size_mask; | |
- static const unsigned k_sigma; | |
- static const unsigned k_sigma_mask; | |
- | |
- static const unsigned pagesize = (1U << pagesize_log); | |
- static const unsigned pagesize_mask = (1U << pagesize_log) - 1; | |
- | |
- static const unsigned k_char_type_freq = 0x01; | |
- static const unsigned k_char_type_rare = 0x02; | |
- static const unsigned k_char_type_missing = 0x03; | |
- | |
- unsigned long m_length; // length of original sequence | |
- unsigned long n_cblocks; // number of context blocks | |
- unsigned long n_sblocks; // number of super blocks | |
- | |
- unsigned long *m_sblock_header; | |
- unsigned long *m_cblock_header; | |
- unsigned long *m_cblock_header2; | |
- | |
- unsigned char *m_cblock_type; | |
- unsigned char *m_cblock_mapping; | |
- | |
- unsigned *m_freq_trunk; | |
- unsigned *m_rare_trunk; | |
- | |
- public: | |
- unsigned long *m_count; // symbol counts | |
- | |
- public: | |
- rank4n(const pagearray_type *ptext, unsigned long length, unsigned max_threads) { | |
- m_length = length; | |
- n_cblocks = (m_length + k_cblock_size - 1) / k_cblock_size; | |
- n_sblocks = (n_cblocks + k_cblocks_in_sblock - 1) / k_cblocks_in_sblock; | |
- | |
- m_count = (unsigned long *)malloc(256L * sizeof(unsigned long)); | |
- std::fill(m_count, m_count + 256, 0UL); | |
- if (!m_length) return; | |
- | |
- long double start = utils::wclock(); | |
- m_sblock_header = (unsigned long *)malloc(n_sblocks * sizeof(unsigned long) * k_sigma); | |
- m_cblock_header = (unsigned long *)malloc(n_cblocks * sizeof(unsigned long)); | |
- m_cblock_header2 = (unsigned long *)malloc(n_cblocks * k_sigma * sizeof(unsigned long)); | |
- m_cblock_mapping = (unsigned char *)malloc(n_cblocks * k_sigma * 2); | |
- m_cblock_type = (unsigned char *)malloc((n_cblocks + 7) / 8); | |
- m_freq_trunk = (unsigned *)calloc(n_cblocks * k_cblock_size, sizeof(unsigned)); | |
- std::fill(m_cblock_type, m_cblock_type + (n_cblocks + 7) / 8, 0); | |
- unsigned char *bwt = (unsigned char *)malloc(length + k_cblock_size); | |
- long double alloc_time = utils::wclock() - start; | |
- if (alloc_time > 0.05L) | |
- fprintf(stderr, "alloc: %.2Lf ", alloc_time); | |
- | |
- encode_type_I(ptext, bwt, max_threads); | |
- encode_type_II(bwt, max_threads); | |
- | |
- m_count[0] -= n_cblocks * k_cblock_size - m_length; // remove extra zeros | |
- free(bwt); | |
- } | |
- | |
- void encode_type_I(const pagearray_type *ptext, unsigned char *bwt, | |
- long max_threads) { | |
- //------------------------------------------------------------------------ | |
- // STEP 1: split all cblocks into equal size ranges (except possible the | |
- // last one). Each range is processed by one thread. During this | |
- // step we compute: (i) type of each cblock, (ii) encode all | |
- // type-I cblocks and for all type-II cblocks, we compute and | |
- // store: symbol mapping, symbol type (freq / rare / non-occurring) | |
- // and values of freq_cnt_log and rare_cnt_log. | |
- //------------------------------------------------------------------------ | |
- unsigned long range_size = (n_cblocks + max_threads - 1) / max_threads; | |
- unsigned long n_ranges = (n_cblocks + range_size - 1) / range_size; | |
- | |
- unsigned long *rare_trunk_size = new unsigned long[n_cblocks]; | |
- std::fill(rare_trunk_size, rare_trunk_size + n_cblocks, 0); | |
- | |
- bool *cblock_type = new bool[n_cblocks]; | |
- std::fill(cblock_type, cblock_type + n_cblocks, 0); | |
- | |
- unsigned **occ = (unsigned **)malloc(n_ranges * sizeof(unsigned *)); | |
- for (unsigned long i = 0; i < n_ranges; ++i) | |
- occ[i] = (unsigned *)malloc((k_cblock_size + 1) * sizeof(unsigned)); | |
- | |
- fprintf(stderr, "s1: "); | |
- long double start = utils::wclock(); | |
- std::thread **threads = new std::thread*[n_ranges]; | |
- for (unsigned long i = 0; i < n_ranges; ++i) { | |
- unsigned long range_beg = i * range_size; | |
- unsigned long range_end = std::min(range_beg + range_size, n_cblocks); | |
- | |
- threads[i] = new std::thread(encode_type_I_aux, std::ref(*this), | |
- ptext, range_beg, range_end, rare_trunk_size, cblock_type, occ[i], bwt); | |
- } | |
- | |
- for (unsigned long i = 0; i < n_ranges; ++i) threads[i]->join(); | |
- for (unsigned long i = 0; i < n_ranges; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- for (unsigned long i = 0; i < n_ranges; ++i) | |
- free(occ[i]); | |
- free(occ); | |
- | |
- fprintf(stderr, "%.2Lf ", utils::wclock() - start); | |
- | |
- | |
- //------------------------------------------------------------------------ | |
- // STEP 2: compute global information based on local cblock computation: | |
- // * store cblock types, | |
- // * total size of rare trunk, | |
- // * pointers to the beginning of each rare trunk, | |
- // * cumulative counts of all symbols, | |
- // * non-inclusive partial sum over cblock range counts. | |
- //------------------------------------------------------------------------ | |
- fprintf(stderr, "s2: "); | |
- start = utils::wclock(); | |
- unsigned long rare_trunk_total_size = 0; | |
- for (unsigned long cblock_id = 0; cblock_id < n_cblocks; ++cblock_id) { | |
- unsigned long cblock_beg = (cblock_id << k_cblock_size_log); | |
- | |
- // 1 | |
- // Store cblock type. | |
- if (cblock_type[cblock_id]) | |
- m_cblock_type[cblock_id >> 3] |= (1 << (cblock_id & 7)); | |
- | |
- // 2 | |
- // Compute the pointer to rare trunk and update total rare trunk size. | |
- unsigned long this_cblock_rare_trunk_size = rare_trunk_size[cblock_id]; | |
- m_cblock_header[cblock_id] |= (rare_trunk_total_size << 16); | |
- rare_trunk_total_size += this_cblock_rare_trunk_size; | |
- | |
- // 3 | |
- // Update cblock header. | |
- unsigned long cblock_header_beg = (cblock_id << k_sigma_log); | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- m_cblock_header2[cblock_header_beg + c] |= (m_count[c] << (k_cblock_size_log + 6)); | |
- | |
- // 4 | |
- // Update sblock header, | |
- if (!(cblock_beg & k_sblock_size_mask)) { | |
- unsigned long sblock_id = (cblock_beg >> k_sblock_size_log); | |
- unsigned long sblock_header_beg = (sblock_id << k_sigma_log); | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- m_sblock_header[sblock_header_beg + c] = m_count[c]; | |
- } | |
- | |
- // 5 | |
- // Update m_count. | |
- unsigned long ptr = (cblock_id << k_sigma_log); | |
- for (unsigned c = 0; c + 1 < k_sigma; ++c) | |
- m_count[c] += ((m_cblock_header2[ptr + c + 1] >> 5) & k_2cblock_size_mask) - | |
- ((m_cblock_header2[ptr + c] >> 5) & k_2cblock_size_mask); | |
- m_count[k_sigma - 1] += k_cblock_size - | |
- ((m_cblock_header2[ptr + k_sigma - 1] >> 5) & k_2cblock_size_mask); | |
- } | |
- m_rare_trunk = (unsigned *)calloc(rare_trunk_total_size, sizeof(unsigned)); | |
- | |
- delete[] cblock_type; | |
- delete[] rare_trunk_size; | |
- | |
- fprintf(stderr, "%.2Lf ", utils::wclock() - start); | |
- } | |
- | |
- static void encode_type_I_aux(rank4n &r, const pagearray_type *ptext, | |
- unsigned long cblock_range_beg, unsigned long cblock_range_end, | |
- unsigned long *rare_trunk_size, bool *cblock_type, unsigned *occ, unsigned char *bwt) { | |
- std::vector<std::pair<uint32_t, unsigned char> > sorted_chars; | |
- std::vector<unsigned char> freq_chars; | |
- std::vector<unsigned char> rare_chars; | |
- | |
- unsigned *refpoint_precomputed = (unsigned *)malloc(k_cblock_size * sizeof(unsigned)); | |
- unsigned *cblock_count = new unsigned[k_sigma]; | |
- unsigned *list_beg = new unsigned[k_sigma]; | |
- unsigned *list_beg2 = new unsigned[k_sigma]; | |
- bool *isfreq = new bool[k_sigma]; | |
- unsigned *lookup_bits_precomputed = new unsigned[k_sigma]; | |
- unsigned *min_block_size_precomputed = new unsigned[k_sigma]; | |
- unsigned long *refpoint_mask_precomputed = new unsigned long[k_sigma]; | |
- | |
- typedef typename pagearray_type::value_type value_type; | |
- | |
- // Process cblocks one by one. | |
- for (unsigned long cblock_id = cblock_range_beg; cblock_id < cblock_range_end; ++cblock_id) { | |
- unsigned long cblock_beg = cblock_id << k_cblock_size_log; | |
- unsigned long cblock_end = cblock_beg + k_cblock_size; | |
- | |
- // Compute symbol counts inside cblock and store bwt symbols. | |
- std::fill(cblock_count, cblock_count + k_sigma, 0); | |
- unsigned long maxj = std::min(cblock_end, r.m_length); | |
- unsigned long page_id = (cblock_beg >> pagesize_log); | |
- value_type *cur_page = ptext->m_pageindex[page_id++]; | |
- unsigned long page_offset = ptext->get_page_offset(cblock_beg); | |
- for (unsigned long j = cblock_beg; j < maxj; ++j) { | |
- unsigned char c = cur_page[page_offset].bwt; | |
- bwt[j] = c; | |
- ++cblock_count[c]; | |
- ++page_offset; | |
- if (page_offset == pagesize) { | |
- cur_page = ptext->m_pageindex[page_id]; | |
- ++page_id; | |
- page_offset = 0; | |
- } | |
- } | |
- for (unsigned long j = maxj; j < cblock_end; ++j) { | |
- bwt[j] = 0; | |
- ++cblock_count[0]; | |
- } | |
- | |
- | |
- // Compute starting positions of occurrences lists. | |
- for (unsigned j = 0, t, s = 0; j < k_sigma; ++j) { | |
- t = cblock_count[j]; | |
- list_beg[j] = s; | |
- list_beg2[j] = s; | |
- s += t; | |
- } | |
- | |
- // Store pointers to beginnings of occurrence lists in the type-I | |
- // cblock header. Note: this implicitly encodes cblock counts. | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- r.m_cblock_header2[(cblock_id << k_sigma_log) + c] = (list_beg[c] << 5); | |
- | |
- // Sort symbol counts by frequencies. | |
- sorted_chars.clear(); | |
- for (unsigned j = 0; j < k_sigma; ++j) | |
- if (cblock_count[j]) | |
- sorted_chars.push_back(std::make_pair(cblock_count[j], j)); | |
- std::sort(sorted_chars.begin(), sorted_chars.end()); | |
- | |
- // Separate (at most, due to rounding of freq_cnt) | |
- // about 3% of rarest symbols. | |
- unsigned rare_cnt = 0L, rare_sum = 0L; | |
- while (rare_cnt < sorted_chars.size() && | |
- 16L * (rare_sum + sorted_chars[rare_cnt].first) <= k_cblock_size) | |
- rare_sum += sorted_chars[rare_cnt++].first; | |
- | |
- // Compute freq_cnt. Then round up freq_cnt + 1 (+1 is | |
- // for rare char marker) to the smallest power of two. | |
- // Note: rare_cnt > 0, so after rounding freq_cnt <= 256. | |
- unsigned freq_cnt = sorted_chars.size() - rare_cnt; | |
- unsigned freq_cnt_log = utils::log2ceil(freq_cnt + 1); | |
- freq_cnt = (1 << freq_cnt_log); | |
- | |
- // Recompute rare_cnt (note the +1). | |
- rare_cnt = 0; | |
- if (sorted_chars.size() + 1 > freq_cnt) | |
- rare_cnt = sorted_chars.size() + 1 - freq_cnt; | |
- | |
- // Compute freq and rare chars. | |
- rare_chars.clear(); | |
- freq_chars.clear(); | |
- for (unsigned i = 0; i < rare_cnt; ++i) | |
- rare_chars.push_back(sorted_chars[i].second); | |
- for (unsigned i = rare_cnt; i < sorted_chars.size(); ++i) | |
- freq_chars.push_back(sorted_chars[i].second); | |
- | |
- // If there are rare symbols, round up | |
- // rare_cnt to the smallest power of two. | |
- unsigned rare_cnt_log = 0; | |
- if (rare_cnt) { | |
- rare_cnt_log = utils::log2ceil(rare_cnt); | |
- rare_cnt = (1 << rare_cnt_log); | |
- } | |
- | |
- // Update cblock type-I header. | |
- r.m_cblock_header[cblock_id] = freq_cnt_log; | |
- r.m_cblock_header[cblock_id] |= (rare_cnt_log << 8); | |
- | |
- // Compute and store symbols mapping. | |
- std::sort(freq_chars.begin(), freq_chars.end()); | |
- std::sort(rare_chars.begin(), rare_chars.end()); | |
- std::fill(isfreq, isfreq + 256, false); | |
- for (unsigned c = 0; c < 256; ++c) | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_missing; | |
- for (unsigned i = 0; i < freq_chars.size(); ++i) { | |
- unsigned char c = freq_chars[i]; | |
- isfreq[c] = true; | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1] = i; | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_freq; | |
- } | |
- for (unsigned i = 0; i < rare_chars.size(); ++i) { | |
- unsigned char c = rare_chars[i]; | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1] = i; | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_rare; | |
- } | |
- | |
- unsigned nofreq_cnt = 0L; | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- if (!isfreq[c]) nofreq_cnt += cblock_count[c]; | |
- | |
- | |
- if (freq_cnt >= 128) { // type-I cblock | |
- cblock_type[cblock_id] = true; | |
- | |
- // Compute lists of occurrences. | |
- for (unsigned long i = cblock_beg; i < cblock_end; ++i) | |
- occ[list_beg2[bwt[i]]++] = i - cblock_beg; | |
- | |
- // Precompute helper arrays and and store lookup bits into the header. | |
- for (unsigned c = 0; c < k_sigma; ++c) { | |
- lookup_bits_precomputed[c] = utils::log2ceil(cblock_count[c] + 2); | |
- r.m_cblock_header2[(cblock_id << 8) + c] |= lookup_bits_precomputed[c]; | |
- if (cblock_count[c]) | |
- min_block_size_precomputed[c] = k_cblock_size / cblock_count[c]; | |
- else min_block_size_precomputed[c] = 0; | |
- | |
- unsigned refpoint_dist_log = 31 - lookup_bits_precomputed[c]; | |
- unsigned long refpoint_dist = (1UL << refpoint_dist_log); | |
- unsigned long refpoint_dist_mask = refpoint_dist - 1; | |
- unsigned long refpoint_dist_mask_neg = (~refpoint_dist_mask); | |
- refpoint_mask_precomputed[c] = refpoint_dist_mask_neg; | |
- } | |
- | |
- // Actual encoding follows. | |
- unsigned *cblock_trunk = r.m_freq_trunk + cblock_beg; | |
- for (unsigned c = 0; c < k_sigma; ++c) { | |
- unsigned freq = cblock_count[c]; | |
- unsigned min_block_size = min_block_size_precomputed[c]; | |
- unsigned lookup_bits = lookup_bits_precomputed[c]; | |
- unsigned refpoint_dist_mask_neg = refpoint_mask_precomputed[c]; | |
- unsigned c_list_beg = list_beg[c]; | |
- | |
- for (unsigned j = 0; j < freq; ++j) | |
- cblock_trunk[c_list_beg + j] = freq + 1; | |
- if (freq) cblock_trunk[c_list_beg + freq - 1] = freq; | |
- | |
- unsigned block_beg = 0; | |
- for (unsigned j = 0; j < freq; ++j) { | |
- refpoint_precomputed[j] = (block_beg & refpoint_dist_mask_neg); | |
- block_beg += min_block_size; | |
- if ((((unsigned long)block_beg * freq) >> k_cblock_size_log) == j) ++block_beg; | |
- } | |
- | |
- unsigned refpoint, block_id; | |
- unsigned mask = (~((1UL << lookup_bits) - 1)); | |
- if (freq) { | |
- for (long j = freq - 1; j >= 0; --j) { | |
- block_id = (((unsigned long)occ[c_list_beg + j] * freq) >> k_cblock_size_log); | |
- refpoint = refpoint_precomputed[block_id]; | |
- cblock_trunk[c_list_beg + block_id] &= mask; | |
- cblock_trunk[c_list_beg + block_id] |= (unsigned)j; | |
- cblock_trunk[c_list_beg + j] |= ((occ[c_list_beg + j] - refpoint) << lookup_bits); | |
- } | |
- } | |
- } | |
- } else { | |
- // Update rare_trunk_size. | |
- if (rare_cnt) { | |
- long rare_blocks = 1 + (nofreq_cnt + rare_cnt - 1) / rare_cnt; | |
- rare_trunk_size[cblock_id] = rare_blocks * rare_cnt; | |
- } | |
- } | |
- } | |
- | |
- // Clean up. | |
- delete[] list_beg; | |
- delete[] list_beg2; | |
- delete[] isfreq; | |
- delete[] cblock_count; | |
- delete[] lookup_bits_precomputed; | |
- delete[] min_block_size_precomputed; | |
- delete[] refpoint_mask_precomputed; | |
- free(refpoint_precomputed); | |
- } | |
- | |
- void encode_type_II(const unsigned char *bwt, long max_threads) { | |
- fprintf(stderr, "s3: "); | |
- long double start = utils::wclock(); | |
- | |
- unsigned long range_size = (n_cblocks + max_threads - 1) / max_threads; | |
- unsigned long n_ranges = (n_cblocks + range_size - 1) / range_size; | |
- | |
- std::thread **threads = new std::thread*[n_ranges]; | |
- for (unsigned long i = 0; i < n_ranges; ++i) { | |
- unsigned long range_beg = i * range_size; | |
- unsigned long range_end = std::min(range_beg + range_size, n_cblocks); | |
- | |
- threads[i] = new std::thread(encode_type_II_aux, | |
- std::ref(*this), range_beg, range_end, bwt); | |
- } | |
- | |
- for (unsigned long i = 0; i < n_ranges; ++i) threads[i]->join(); | |
- for (unsigned long i = 0; i < n_ranges; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- fprintf(stderr, "%.2Lf ", utils::wclock() - start); | |
- } | |
- | |
- static void encode_type_II_aux(rank4n &r, unsigned long cblock_range_beg, | |
- unsigned long cblock_range_end, const unsigned char *bwt) { | |
- unsigned char *freq_map = new unsigned char[k_sigma]; | |
- unsigned char *rare_map = new unsigned char[k_sigma]; | |
- unsigned long *cur_count = new unsigned long[k_sigma]; | |
- unsigned long *off = new unsigned long[k_sigma]; | |
- | |
- long *sblock_h = new long[k_sigma]; | |
- int *israre = new int[k_sigma]; | |
- | |
- std::vector<unsigned char> freq_chars; | |
- std::vector<unsigned char> rare_chars; | |
- | |
- for (unsigned long cblock_id = cblock_range_beg; cblock_id < cblock_range_end; ++cblock_id) { | |
- unsigned long cblock_beg = cblock_id << k_cblock_size_log; | |
- unsigned long cblock_end = cblock_beg + k_cblock_size; | |
- | |
- // Skip the cblock if it was type-I encoded. | |
- if (r.m_cblock_type[cblock_id >> 3] & (1 << (cblock_id & 7))) continue; | |
- | |
- // Retreive symbol counts up to this cblock begin and | |
- // pointer to rare trunk size from cblock headers. | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- cur_count[c] = (r.m_cblock_header2[(cblock_id << 8) + c] >> (k_cblock_size_log + 6)); | |
- | |
- long r_filled = (r.m_cblock_header[cblock_id] >> 16); | |
- long r_ptr = r_filled; | |
- | |
- long freq_cnt_log = (r.m_cblock_header[cblock_id] & 255L); | |
- long rare_cnt_log = ((r.m_cblock_header[cblock_id] >> 8) & 255L); | |
- long freq_cnt = (1L << freq_cnt_log); | |
- long rare_cnt = (1L << rare_cnt_log); | |
- long rare_cnt_mask = rare_cnt - 1; | |
- | |
- freq_chars.clear(); | |
- rare_chars.clear(); | |
- std::fill(israre, israre + k_sigma, 1); | |
- for (unsigned c = 0; c < k_sigma; ++c) { | |
- unsigned char type = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)]; | |
- if (type == k_char_type_freq) { | |
- israre[c] = 0; | |
- freq_chars.push_back(c); | |
- freq_map[c] = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1]; | |
- } else if (type == k_char_type_rare) { | |
- rare_chars.push_back(c); | |
- rare_map[c] = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1]; | |
- freq_map[c] = freq_cnt - 1; | |
- } | |
- } | |
- | |
- if (rare_chars.empty()) { | |
- rare_cnt_log = 0; | |
- rare_cnt = 0; | |
- } | |
- | |
- long sblock_id = (cblock_beg >> k_sblock_size_log); | |
- std::copy(r.m_sblock_header + (sblock_id << 8), r.m_sblock_header + (sblock_id << 8) + k_sigma, sblock_h); | |
- for (long j = 0; j < k_sigma; ++j) off[j] = cur_count[j] - sblock_h[j]; | |
- | |
- long nofreq_cnt = 0; | |
- long freq_chars_size = (long)freq_chars.size(); | |
- long rare_chars_size = (long)rare_chars.size(); | |
- for (unsigned long i = cblock_beg; i < cblock_end; i += freq_cnt) { | |
- for (long j = 0; j < freq_chars_size; ++j) { | |
- unsigned char ch = freq_chars[j]; | |
- r.m_freq_trunk[i + j] = (off[ch] << 8); | |
- } | |
- r.m_freq_trunk[i + freq_cnt - 1] = (nofreq_cnt << 8); | |
- for (unsigned long j = i; j < i + freq_cnt; ++j) { | |
- unsigned char c = bwt[j]; | |
- r.m_freq_trunk[j] |= freq_map[c]; | |
- if (israre[c]) { | |
- if (!(nofreq_cnt & rare_cnt_mask)) { | |
- for (long jj = 0; jj < rare_chars_size; ++jj) { | |
- unsigned char ch = rare_chars[jj]; | |
- r.m_rare_trunk[r_filled++] = (off[ch] << 8); | |
- } | |
- r_filled += rare_cnt - rare_chars_size; | |
- } | |
- r.m_rare_trunk[r_ptr++] |= rare_map[c]; | |
- } | |
- ++off[c]; | |
- nofreq_cnt += israre[c]; | |
- } | |
- } | |
- for (long i = 0; i < k_sigma; ++i) | |
- cur_count[i] = sblock_h[i] + off[i]; | |
- | |
- for (long j = 0; j < rare_cnt; ++j) { | |
- unsigned char ch = (j < (long)rare_chars.size() ? rare_chars[j] : 0); | |
- long local_rank = cur_count[ch] - r.m_sblock_header[(sblock_id << 8) + ch]; | |
- r.m_rare_trunk[r_filled++] = (local_rank << 8); | |
- } | |
- } | |
- | |
- delete[] cur_count; | |
- delete[] sblock_h; | |
- delete[] freq_map; | |
- delete[] rare_map; | |
- delete[] israre; | |
- delete[] off; | |
- } | |
- | |
- inline long rank(long i, unsigned char c) const { | |
- if (i <= 0) return 0L; | |
- else if ((unsigned long)i >= m_length) return m_count[c]; | |
- | |
- unsigned long cblock_id = (i >> k_cblock_size_log); | |
- if (m_cblock_type[cblock_id >> 3] & (1 << (cblock_id & 7))) { // type-I cblock | |
- long cblock_beg = (i & k_cblock_size_mask_neg); | |
- long cblock_i = (i & k_cblock_size_mask); // offset in cblock | |
- | |
- // Extract the rank up to the start of cblock. | |
- long rank_up_to_cblock = (m_cblock_header2[(cblock_id << k_sigma_log) + c] >> (k_cblock_size_log + 6)); | |
- | |
- // Now we compute the number of occurrences of c inside the cblock. | |
- // First, decode the beginning and end of c's occurrence list. | |
- long list_beg = ((m_cblock_header2[(cblock_id << k_sigma_log) + c] >> 5) & k_2cblock_size_mask); | |
- long list_end = ((c == k_sigma - 1) ? k_cblock_size : | |
- ((m_cblock_header2[(cblock_id << k_sigma_log) + c + 1] >> 5) & k_2cblock_size_mask)); | |
- if (list_beg == list_end) return rank_up_to_cblock; | |
- | |
- // Compute the distance from i to the closest reference point on the left. | |
- long lookup_bits = (m_cblock_header2[(cblock_id << k_sigma_log) + c] & 31); | |
- long refpoint_dist_log = 31 - lookup_bits; | |
- long refpoint_disk_mask = (1L << refpoint_dist_log) - 1; | |
- long i_refpoint_offset = (cblock_i & refpoint_disk_mask); | |
- | |
- // Compute threshold of symbol c inside the current cblock. | |
- long threshold = (1L << (k_cblock_size_log - lookup_bits + 1)); | |
- | |
- // Compute the id of block containing i. | |
- long list_size = list_end - list_beg; | |
- long approx = ((cblock_i * list_size) >> k_cblock_size_log); | |
- | |
- // Extract the lookup table entry. | |
- long lookup_mask = (1L << lookup_bits) - 1; | |
- long begin = (m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask); | |
- | |
- // Empty block optimization. | |
- if (begin == list_size + 1) { | |
- // Block containing cblock_i is empty, just find the beginning. | |
- ++approx; | |
- while ((m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask) == list_size + 1) ++approx; | |
- begin = (m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask); | |
- return rank_up_to_cblock + begin; | |
- } | |
- | |
- long next_block_begin = (approx + 1 == list_size) ? list_size : | |
- (m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask); | |
- | |
- // Correct next_block_begin. | |
- if (approx + 1 != list_size && next_block_begin == list_size + 1) { | |
- ++approx; | |
- while ((m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask) == list_size + 1) ++approx; | |
- next_block_begin = (m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask); | |
- } | |
- | |
- // Correct the value of begin and return the answer. | |
- if (i_refpoint_offset >= threshold) { | |
- // Case 1: easy case, will happen most of the time. | |
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset) | |
- ++begin; | |
- | |
- return rank_up_to_cblock + begin; | |
- } else { | |
- // Case 2: executed very rarely. | |
- if (begin == next_block_begin || (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < (2 * threshold)) { | |
- // Case 2a: the value in the occ list was small -> the ref | |
- // point for i and for the block are the same, we | |
- // proceed as before, without modifying i_refpoint_offset. | |
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset) | |
- ++begin; | |
- | |
- return rank_up_to_cblock + begin; | |
- } else { | |
- // Case 2b: block occurrences were encoded wrt to the | |
- // previous ref point -> we increase i_refpoint_offset | |
- // by refpoint_dist and proceed as before. | |
- i_refpoint_offset += (1L << refpoint_dist_log); | |
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset) | |
- ++begin; | |
- | |
- return rank_up_to_cblock + begin; | |
- } | |
- } | |
- } else { // type-II cblock | |
- long sblock_id = (i >> k_sblock_size_log); | |
- long sblock_rank = m_sblock_header[(sblock_id << 8) + c]; | |
- | |
- unsigned char type = m_cblock_mapping[2 * (c * n_cblocks + cblock_id)]; | |
- unsigned char c_map = m_cblock_mapping[2 * (c * n_cblocks + cblock_id) + 1]; | |
- | |
- long freq_cnt_bits = (m_cblock_header[cblock_id] & 255L); | |
- long rare_cnt_bits = ((m_cblock_header[cblock_id] >> 8) & 255L); | |
- long block_id = (i >> freq_cnt_bits); | |
- | |
- if (type == k_char_type_freq) { | |
- // Case 1 (fastest): symbol c was frequent in the context block. | |
- // Answer a query using frequent trunk. | |
- long block_rank = m_freq_trunk[(block_id << freq_cnt_bits) + c_map] >> 8; | |
- long extra = 0; | |
- for (long j = (block_id << freq_cnt_bits); j < i; ++j) | |
- if ((m_freq_trunk[j] & 255) == c_map) ++extra; | |
- | |
- return sblock_rank + block_rank + extra; | |
- } else if (type == k_char_type_rare) { | |
- // Case 2: symbol c was rare inside the context block. | |
- // Compute new_i. | |
- long rare_trunk_ptr = (m_cblock_header[cblock_id] >> 16); | |
- long new_i = m_freq_trunk[((block_id + 1) << freq_cnt_bits) - 1] >> 8; | |
- for (long j = (block_id << freq_cnt_bits); j < i; ++j) | |
- if ((m_freq_trunk[j] & 255) + 1 == (1U << freq_cnt_bits)) ++new_i; | |
- | |
- // Answer a query on rare trunk. | |
- long rare_block_id = (new_i >> rare_cnt_bits); | |
- long block_rank = m_rare_trunk[rare_trunk_ptr + | |
- (rare_block_id << rare_cnt_bits) + c_map] >> 8; | |
- long extra = 0; | |
- for (long j = (rare_block_id << rare_cnt_bits); j < new_i; ++j) | |
- if ((m_rare_trunk[rare_trunk_ptr + j] & 255) == c_map) ++extra; | |
- | |
- return sblock_rank + block_rank + extra; | |
- } else { | |
- // Case 3: symbol c does not occur in the context block. | |
- // Find the first cblock where c occurrs. | |
- while (cblock_id < n_cblocks && (cblock_id & k_cblocks_in_sblock_mask) && | |
- m_cblock_mapping[2 * (c * n_cblocks + cblock_id)] == k_char_type_missing) | |
- ++cblock_id; | |
- | |
- if (cblock_id == n_cblocks) { | |
- // We reached the end of encoding, return count[c]. | |
- return m_count[c]; | |
- } else if (!(cblock_id & k_cblocks_in_sblock_mask)) { | |
- // We reached the boundary of superblock, | |
- // retreive the answer from superblock header. | |
- return m_sblock_header[256 * (cblock_id >> k_cblocks_in_sblock_log) + c]; | |
- } else { | |
- // We found cblock where c occurrs, but it wasn't on the | |
- // sblock boundary. In the recursive call this will either | |
- // be case 1 or case 2. | |
- return rank(cblock_id << k_cblock_size_log, c); | |
- } | |
- } | |
- } | |
- } | |
- | |
- ~rank4n() { | |
- if (m_length) { | |
- free(m_sblock_header); | |
- free(m_cblock_header); | |
- free(m_cblock_header2); | |
- free(m_cblock_mapping); | |
- free(m_cblock_type); | |
- free(m_freq_trunk); | |
- free(m_rare_trunk); | |
- } | |
- free(m_count); | |
- } | |
-}; | |
- | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned long rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblock_size = (1L << k_cblock_size_log); | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned long rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblock_size_mask = (1L << k_cblock_size_log) - 1; | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_2cblock_size = (2 << k_cblock_size_log); | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_2cblock_size_mask = (2 << k_cblock_size_log) - 1; | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_sigma = (1 << k_sigma_log); | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_sigma_mask = (1 << k_sigma_log) - 1; | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned long rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblock_size_mask_neg = ~((1L << k_cblock_size_log) - 1); | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblocks_in_sblock_log = k_sblock_size_log - k_cblock_size_log; | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblocks_in_sblock = (1 << (k_sblock_size_log - k_cblock_size_log)); | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblocks_in_sblock_mask = (1 << (k_sblock_size_log - k_cblock_size_log)) - 1; | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_sblock_size = (1 << k_sblock_size_log); | |
- | |
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_sblock_size_mask = (1 << k_sblock_size_log) - 1; | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_RANK_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/sparse_isa.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/sparse_isa.h | |
deleted file mode 100644 | |
index 68b2076f..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/sparse_isa.h | |
+++ /dev/null | |
@@ -1,161 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/sparse_isa.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section DESCRIPTION | |
- * | |
- * Sparse ISA encoding based on the ISAs algorithm computing | |
- * Lempel-Ziv (LZ77) factorization described in | |
- * | |
- * Dominik Kempa, Simon J. Puglisi: | |
- * Lempel-Ziv factorization: Simple, fast, practical. | |
- * In Proc. ALENEX 2013, p. 103-112. | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_SPARSE_ISA_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_SPARSE_ISA_H_INCLUDED | |
- | |
-#include <algorithm> | |
-#include <thread> | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-template<typename pagearray_type, typename rank_type, unsigned isa_sampling_rate_log> | |
-struct sparse_isa { | |
- static const unsigned isa_sampling_rate = (1U << isa_sampling_rate_log); | |
- static const unsigned isa_sampling_rate_mask = isa_sampling_rate - 1; | |
- static const long k_sigma = 256; | |
- | |
- static void compute_sparse_isa_aux(const pagearray_type &bwtsa, long block_beg, | |
- long block_end, long psa_size, long *sparse_isa, long &last) { | |
- for (long j = block_beg; j < block_end; ++j) { | |
- long sa_j = bwtsa[j].sa; | |
- if (!(sa_j & isa_sampling_rate_mask)) | |
- sparse_isa[sa_j >> isa_sampling_rate_log] = j; | |
- if (sa_j == psa_size - 1) last = j; | |
- } | |
- } | |
- | |
- sparse_isa(const pagearray_type *bwtsa, const unsigned char *text, | |
- const rank_type *rank, long length, long i0, long max_threads) { | |
- m_bwtsa = bwtsa; | |
- m_length = length; | |
- m_rank = rank; | |
- m_text = text; | |
- m_i0 = i0; | |
- | |
- long elems = (m_length + isa_sampling_rate - 1) / isa_sampling_rate + 1; | |
- m_sparse_isa = (long *)malloc(elems * sizeof(long)); | |
- | |
- long max_block_size = (m_length + max_threads - 1) / max_threads; | |
- long n_blocks = (m_length + max_block_size - 1) / max_block_size; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long t = 0; t < n_blocks; ++t) { | |
- long block_beg = t * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, m_length); | |
- | |
- threads[t] = new std::thread(compute_sparse_isa_aux, std::ref(*m_bwtsa), | |
- block_beg, block_end, m_length, m_sparse_isa, std::ref(m_last_isa)); | |
- } | |
- | |
- for (long t = 0; t < n_blocks; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_blocks; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- m_count = (long *)malloc(k_sigma * sizeof(long)); | |
- std::copy(rank->m_count, rank->m_count + k_sigma, m_count); | |
- ++m_count[text[length - 1]]; | |
- --m_count[0]; | |
- | |
- for (long i = 0, s = 0; i < k_sigma; ++i) { | |
- long t = m_count[i]; | |
- m_count[i] = s; | |
- s += t; | |
- } | |
- } | |
- | |
- inline long query(long j) const { | |
- long isa_i; | |
- long i = ((j + isa_sampling_rate - 1) >> isa_sampling_rate_log); | |
- if ((i << isa_sampling_rate_log) < m_length) { | |
- isa_i = m_sparse_isa[i]; | |
- i <<= isa_sampling_rate_log; | |
- } else { | |
- isa_i = m_last_isa; | |
- i = m_length - 1; | |
- } | |
- | |
- while (i != j) { | |
- // Compute ISA[i - 1] from ISA[i]. | |
- // Invariant: | |
- // isa_i = ISA[i] | |
- // j <= i | |
- unsigned char c = m_text[i - 1]; | |
- int delta = (isa_i > m_i0 && c == 0); | |
- | |
- isa_i = m_count[c] + m_rank->rank(isa_i, c) - delta; | |
- if (isa_i < 0 || ((long)((*m_bwtsa)[isa_i].sa)) != i - 1) | |
- ++isa_i; | |
- | |
- --i; | |
- } | |
- | |
- return isa_i; | |
- } | |
- | |
- ~sparse_isa() { | |
- free(m_sparse_isa); | |
- free(m_count); | |
- } | |
- | |
- | |
-private: | |
- long m_length; | |
- long m_last_isa; | |
- long m_i0; | |
- | |
- long *m_count; | |
- long *m_sparse_isa; | |
- | |
- const unsigned char *m_text; | |
- const pagearray_type *m_bwtsa; | |
- const rank_type *m_rank; | |
-}; | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_SPARSE_ISA_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/srank_aux.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/srank_aux.h | |
deleted file mode 100644 | |
index 139ad357..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/srank_aux.h | |
+++ /dev/null | |
@@ -1,72 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/inmem_psascan_src/srank_aux.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_SRANK_AUX_H_INCLUDED | |
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_SRANK_AUX_H_INCLUDED | |
- | |
- | |
-namespace psascan_private { | |
-namespace inmem_psascan_private { | |
- | |
-//============================================================================== | |
-// Compute ms-decomposition of text[0..length) from ms-decomposition of | |
-// text[0..length - 1). The result is returned via updated values s, p, r. | |
-//============================================================================== | |
-template<typename T> | |
-inline void update_ms(const unsigned char *text, T length, T &s, T &p) { | |
- if (length == 1) { s = 0; p = 1; return; } | |
- | |
- T i = length - 1; | |
- while (i < length) { | |
- unsigned char a = text[i - p]; | |
- unsigned char b = text[i]; | |
- | |
- if (a > b) p = i - s + 1; | |
- else if (a < b) { | |
- long r = (i - s); | |
- while (r >= p) r -= p; | |
- i -= r; | |
- s = i; | |
- p = 1; | |
- } | |
- | |
- ++i; | |
- } | |
-} | |
- | |
-} // namespace inmem_psascan_private | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_SRANK_AUX_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/merge.h b/exttools/pSAscan-0.1.0/src/psascan_src/merge.h | |
deleted file mode 100644 | |
index 6fdef5e8..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/merge.h | |
+++ /dev/null | |
@@ -1,186 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/merge.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_MERGE_H_INCLUDED | |
-#define __PSASCAN_SRC_MERGE_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cmath> | |
-#include <string> | |
-#include <vector> | |
-#include <algorithm> | |
- | |
-#include "utils.h" | |
-#include "uint40.h" | |
-#include "distributed_file.h" | |
-#include "half_block_info.h" | |
-#include "async_stream_writer.h" | |
-#include "async_vbyte_stream_reader.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-// Merge partial suffix arrays into final suffix array. | |
-template<typename block_offset_type> | |
-void merge(std::string output_filename, long ram_use, std::vector<half_block_info<block_offset_type> > &hblock_info) { | |
- long n_block = (long)hblock_info.size(); | |
- long text_length = 0; | |
- | |
- std::sort(hblock_info.begin(), hblock_info.end()); | |
- for (size_t j = 0; j < hblock_info.size(); ++j) | |
- text_length += hblock_info[j].end - hblock_info[j].beg; | |
- | |
- long pieces = (1 + sizeof(block_offset_type)) * n_block - 1 + sizeof(uint40); | |
- long buffer_size = (ram_use + pieces - 1) / pieces; | |
- | |
- fprintf(stderr, "\nMerge partial suffix arrays:\n"); | |
- fprintf(stderr, " buffer size per block = %ld (%.2LfMiB)\n", | |
- sizeof(block_offset_type) * buffer_size, | |
- (1.L * sizeof(block_offset_type) * buffer_size) / (1 << 20)); | |
- fprintf(stderr, " sizeof(output_type) = %ld\n", sizeof(uint40)); | |
- | |
- typedef async_vbyte_stream_reader<long> vbyte_reader_type; | |
- typedef async_stream_writer<uint40> output_writer_type; | |
- | |
- output_writer_type *output = new output_writer_type(output_filename, sizeof(uint40) * buffer_size); | |
- vbyte_reader_type **gap = new vbyte_reader_type*[n_block - 1]; | |
- for (long i = 0; i < n_block; ++i) { | |
- hblock_info[i].psa->initialize_reading(sizeof(block_offset_type) * buffer_size); | |
- if (i + 1 != n_block) | |
- gap[i] = new vbyte_reader_type(hblock_info[i].gap_filename, buffer_size); | |
- } | |
- | |
- long *gap_head = new long[n_block]; | |
- for (long i = 0; i + 1 < n_block; ++i) | |
- gap_head[i] = gap[i]->read(); | |
- gap_head[n_block - 1] = 0; | |
- | |
- long tmp = (long)sqrtl((long double)n_block); | |
- long sblock_size = 1L; | |
- long sblock_size_log = 0; | |
- while (sblock_size * 2L <= tmp) { | |
- sblock_size *= 2L; | |
- ++sblock_size_log; | |
- } | |
- | |
- long n_sblocks = (n_block + sblock_size - 1) / sblock_size; | |
- std::pair<long, long> *sblock_info = new std::pair<long, long>[n_sblocks]; | |
- | |
- for (long i = 0; i < n_sblocks; ++i) { | |
- long sblock_beg = i * sblock_size; | |
- long sblock_end = std::min(n_block, sblock_beg + sblock_size); | |
- | |
- sblock_info[i].second = 0; | |
- sblock_info[i].first = gap_head[sblock_beg]; | |
- for (long j = sblock_beg + 1; j < sblock_end; ++j) | |
- sblock_info[i].first = std::min(sblock_info[i].first, gap_head[j]); | |
- } | |
- | |
- long double merge_start = utils::wclock(); | |
- for (long i = 0, dbg = 0; i < text_length; ++i, ++dbg) { | |
- if (dbg == (1 << 23)) { | |
- long double elapsed = utils::wclock() - merge_start; | |
- long inp_vol = (1L + sizeof(block_offset_type)) * i; | |
- long out_vol = sizeof(uint40) * i; | |
- long tot_vol = inp_vol + out_vol; | |
- long double tot_vol_m = tot_vol / (1024.L * 1024); | |
- long double io_speed = tot_vol_m / elapsed; | |
- fprintf(stderr, "\r %.1Lf%%. Time = %.2Lfs. I/O: %2.LfMiB/s", | |
- (100.L * i) / text_length, elapsed, io_speed); | |
- dbg = 0; | |
- } | |
- | |
- // Find the superblock containing gap head equal to zero. | |
- long k = 0; | |
- while (sblock_info[k].first != 0) { | |
- sblock_info[k].first--; | |
- sblock_info[k].second++; | |
- ++k; | |
- } | |
- | |
- // Find the block with the gap head equal to zero. | |
- long sblock_beg = (k << sblock_size_log); | |
- long sblock_end = std::min(n_block, sblock_beg + sblock_size); | |
- | |
- long new_min = text_length; | |
- long j = sblock_beg; | |
- while (gap_head[j] != sblock_info[k].second) { | |
- gap_head[j] -= (sblock_info[k].second + 1); | |
- new_min = std::min(new_min, gap_head[j]); | |
- ++j; | |
- } | |
- | |
- long SA_i = hblock_info[j].psa->read() + hblock_info[j].beg; | |
- | |
- if (j != n_block - 1) gap_head[j] = gap[j]->read(); | |
- new_min = std::min(new_min, gap_head[j]); | |
- ++j; | |
- | |
- while (j < sblock_end) { | |
- gap_head[j] -= sblock_info[k].second; | |
- new_min = std::min(new_min, gap_head[j]); | |
- ++j; | |
- } | |
- | |
- sblock_info[k].first = new_min; | |
- sblock_info[k].second = 0; | |
- | |
- output->write(SA_i); | |
- } | |
- long double merge_time = utils::wclock() - merge_start; | |
- long io_volume = (1 + sizeof(block_offset_type) + sizeof(uint40)) * text_length; | |
- long double io_speed = (io_volume / (1024.L * 1024)) / merge_time; | |
- fprintf(stderr, "\r 100.0%%. Time: %.2Lfs. I/O: %.2LfMiB/s\n", merge_time, io_speed); | |
- | |
- // Clean up. | |
- delete output; | |
- for (long i = 0; i < n_block; ++i) { | |
- hblock_info[i].psa->finish_reading(); | |
- delete hblock_info[i].psa; | |
- if (i + 1 != n_block) | |
- delete gap[i]; | |
- } | |
- | |
- delete[] gap; | |
- delete[] gap_head; | |
- delete[] sblock_info; | |
- | |
- for (int i = 0; i + 1 < n_block; ++i) | |
- utils::file_delete(hblock_info[i].gap_filename); | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_MERGE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/multifile.h b/exttools/pSAscan-0.1.0/src/psascan_src/multifile.h | |
deleted file mode 100644 | |
index d2a5bf72..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/multifile.h | |
+++ /dev/null | |
@@ -1,74 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/multifile.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_MULTIFILE_H_INCLUDED | |
-#define __PSASCAN_SRC_MULTIFILE_H_INCLUDED | |
- | |
-#include <vector> | |
-#include <string> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-struct single_file_info { | |
- long m_beg; | |
- long m_end; | |
- std::string m_filename; | |
- | |
- single_file_info(long beg, long end, std::string filename) { | |
- m_beg = beg; | |
- m_end = end; | |
- m_filename = filename; | |
- } | |
-}; | |
- | |
-struct multifile { | |
- std::vector<single_file_info> files_info; | |
- | |
- void add_file(long beg, long end, std::string filename) { | |
- files_info.push_back(single_file_info(beg, end, filename)); | |
- } | |
- | |
- ~multifile() { | |
- for (size_t i = 0; i < files_info.size(); ++i) | |
- utils::file_delete(files_info[i].m_filename); | |
- } | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_MULTIFILE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/multifile_bit_stream_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/multifile_bit_stream_reader.h | |
deleted file mode 100644 | |
index 743d67b4..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/multifile_bit_stream_reader.h | |
+++ /dev/null | |
@@ -1,171 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/multifile_bit_stream_reader.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED | |
-#define __PSASCAN_SRC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <vector> | |
- | |
-#include "utils.h" | |
-#include "multifile.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-struct multifile_bit_stream_reader { | |
-private: | |
- static const long k_bufsize; | |
- | |
- // info for currently accessed file. | |
- std::FILE *m_file; | |
- long m_file_beg; | |
- long m_file_end; | |
- | |
- unsigned char *m_buffer; | |
- long m_offset; // number of the first bit in the buffer | |
- long m_filled; // how many bits we have in a buffer | |
- | |
- long cur_bit_absolute; | |
- long cur_bit_buffer; | |
- long cur_bit; | |
- long cur_byte; | |
- | |
- std::vector<single_file_info> files_info; | |
- | |
-public: | |
- multifile_bit_stream_reader(const multifile *m) { | |
- m_file = NULL; | |
- m_file_beg = 0; | |
- m_file_end = 0; | |
- m_buffer = new unsigned char[k_bufsize]; | |
- | |
- if (m != NULL) | |
- files_info = m->files_info; | |
- } | |
- | |
- // Subsequent access operations are quaranteed | |
- // to be with increasing argument. | |
- bool access(long i) { | |
- if (i < m_file_beg || m_file_end <= i) { | |
- open_file_for_index(i); | |
- i -= m_file_beg; | |
- } else { | |
- i -= m_file_beg; | |
- | |
- if (i < m_offset || m_offset + m_filled <= i) { | |
- refill(i); | |
- } | |
- } | |
- | |
- i -= m_offset; | |
- return (m_buffer[i >> 3] & (1 << (i & 7))); | |
- } | |
- | |
- void initialize_sequential_reading(long i) { | |
- open_file_for_index(i); | |
- | |
- cur_bit_absolute = i; | |
- cur_bit_buffer = cur_bit_absolute - (m_file_beg + m_offset); | |
- cur_byte = (cur_bit_buffer >> 3); | |
- cur_bit = (cur_bit_buffer & 7); | |
- } | |
- | |
- inline bool read() { | |
- if (cur_bit_absolute == m_file_end) open_file_for_index(m_file_end); | |
- if (cur_bit_buffer == m_filled) refill(m_offset + m_filled); | |
- | |
- bool ans = (m_buffer[cur_byte] & (1 << cur_bit)); | |
- ++cur_bit; | |
- if (cur_bit == 8) { | |
- cur_bit = 0; | |
- ++cur_byte; | |
- } | |
- | |
- ++cur_bit_buffer; | |
- ++cur_bit_absolute; | |
- return ans; | |
- } | |
- | |
- ~multifile_bit_stream_reader() { | |
- if (m_file) | |
- std::fclose(m_file); | |
- delete[] m_buffer; | |
- } | |
- | |
-private: | |
- void refill(long offset) { | |
- offset -= (offset & 7); | |
- if (m_offset + m_filled != offset) | |
- std::fseek(m_file, (offset >> 3), SEEK_SET); | |
- long bytes_read = std::fread(m_buffer, 1, k_bufsize, m_file); | |
- m_filled = std::min(m_file_end - offset, 8L * bytes_read); // in bits | |
- m_offset = offset; // in bits | |
- | |
- cur_byte = 0; // in the buffer | |
- cur_bit = 0; // in the current byte | |
- cur_bit_buffer = 0; | |
- } | |
- | |
- void open_file_for_index(long i) { | |
- // Close current file (if any is open). | |
- if (m_file) std::fclose(m_file); | |
- | |
- // First find the right file. | |
- long id = 0; | |
- while (i < files_info[id].m_beg || files_info[id].m_end <= i) | |
- ++id; | |
- | |
- m_file = utils::open_file(files_info[id].m_filename, "r"); | |
- m_file_beg = files_info[id].m_beg; | |
- m_file_end = files_info[id].m_end; | |
- | |
- cur_bit_absolute = m_file_beg; | |
- cur_bit_buffer = 0; | |
- cur_bit = 0; | |
- cur_byte = 0; | |
- | |
- m_offset = 0; | |
- m_filled = 0; | |
- | |
- refill(i - m_file_beg); | |
- } | |
-}; | |
- | |
-const long multifile_bit_stream_reader::k_bufsize = (1L << 20); | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/parallel_utils.h b/exttools/pSAscan-0.1.0/src/psascan_src/parallel_utils.h | |
deleted file mode 100644 | |
index d0daa61f..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/parallel_utils.h | |
+++ /dev/null | |
@@ -1,207 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/parallel_utils.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_PARALLEL_UTILS_H_INCLUDED | |
-#define __PSASCAN_SRC_PARALLEL_UTILS_H_INCLUDED | |
- | |
-#include <thread> | |
-#include <algorithm> | |
- | |
- | |
-namespace psascan_private { | |
-namespace parallel_utils { | |
- | |
-//============================================================================== | |
-// Encode tab[0..length) using vbyte encoding and write to dest sequentially. | |
-//============================================================================== | |
-void encode_vbyte_slab(const long *tab, long length, unsigned char *dest) { | |
- long ptr = 0L; | |
- for (long j = 0; j < length; ++j) { | |
- long x = tab[j]; | |
- while (x > 127) { | |
- dest[ptr++] = ((x & 0x7f) | 0x80); | |
- x >>= 7; | |
- } | |
- dest[ptr++] = x; | |
- } | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Compute the size of vbyte encoding of tab[0..length). | |
-//============================================================================== | |
-void compute_size_of_vbyte_slab(const long *tab, long length, long &result) { | |
- result = 0L; | |
- for (long j = 0; j < length; ++j) { | |
- long x = tab[j]; | |
- while (x > 127) { | |
- ++result; | |
- x >>= 7; | |
- } | |
- ++result; | |
- } | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Encode tab[0..length) using v-byte encoding and write to dest in parallel. | |
-// We assume that dest is sufficiently large to hold the output. | |
-// The function returns the length of the slab. | |
-//============================================================================== | |
-long convert_array_to_vbyte_slab(const long *tab, long length, unsigned char *dest, long max_threads) { | |
- long max_block_size = (length + max_threads - 1) / max_threads; | |
- long n_blocks = (length + max_block_size - 1) / max_block_size; | |
- | |
- // 1 | |
- // | |
- // Compute the length of slab for each block. | |
- long *block_slab_length = new long[n_blocks]; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long t = 0; t < n_blocks; ++t) { | |
- long block_beg = t * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[t] = new std::thread(compute_size_of_vbyte_slab, | |
- tab + block_beg, block_size, std::ref(block_slab_length[t])); | |
- } | |
- | |
- for (long t = 0; t < n_blocks; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_blocks; ++t) delete threads[t]; | |
- | |
- // 2 | |
- // | |
- // Compute cummulative sum for block slab lengths. | |
- long total_slab_length = 0L; | |
- for (long j = 0; j < n_blocks; ++j) { | |
- long temp = block_slab_length[j]; | |
- block_slab_length[j] = total_slab_length; | |
- total_slab_length += temp; | |
- } | |
- | |
- // 3 | |
- // | |
- // Compute the slabs. Now we know where each slab begins. | |
- for (long t = 0; t < n_blocks; ++t) { | |
- long block_beg = t * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[t] = new std::thread(encode_vbyte_slab, | |
- tab + block_beg, block_size, dest + block_slab_length[t]); | |
- } | |
- | |
- for (long t = 0; t < n_blocks; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_blocks; ++t) delete threads[t]; | |
- delete[] threads; | |
- delete[] block_slab_length; | |
- | |
- return total_slab_length; | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Copy src[0..length) to dest[0..length). | |
-//============================================================================== | |
-template<typename T, typename S> | |
-void parallel_copy_aux(const T *src, S *dest, long length) { | |
- for (long i = 0; i < length; ++i) | |
- dest[i] = (S)src[i]; | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Parallel version of std::copy (with slightly different interface). | |
-// Conversion from T to S has to make sense. | |
-//============================================================================== | |
-template<typename T, typename S> | |
-void parallel_copy(const T *src, S *dest, long length, long max_threads) { | |
- long max_block_size = (length + max_threads - 1) / max_threads; | |
- long n_blocks = (length + max_block_size - 1) / max_block_size; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_beg = i * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(parallel_copy_aux<T, S>, | |
- src + block_beg, dest + block_beg, block_size); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Set all values in tab[0..length) to x. | |
-//============================================================================== | |
-template<typename T> | |
-void parallel_fill_aux(T *tab, long length, T x) { | |
- for (long i = 0; i < length; ++i) | |
- tab[i] = x; | |
-} | |
- | |
- | |
-//============================================================================== | |
-// Parallel version of std::fill (with slightly different interface). | |
-//============================================================================== | |
-template<typename T> | |
-void parallel_fill(T *tab, long length, T x, long max_threads) { | |
- long max_block_size = (length + max_threads - 1) / max_threads; | |
- long n_blocks = (length + max_block_size - 1) / max_block_size; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long i = 0; i < n_blocks; ++i) { | |
- long block_beg = i * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, length); | |
- long block_size = block_end - block_beg; | |
- | |
- threads[i] = new std::thread(parallel_fill_aux<T>, | |
- tab + block_beg, block_size, x); | |
- } | |
- | |
- for (long i = 0; i < n_blocks; ++i) threads[i]->join(); | |
- for (long i = 0; i < n_blocks; ++i) delete threads[i]; | |
- delete[] threads; | |
-} | |
- | |
-} // namespace parallel_utils | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_PARALLEL_UTILS_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/partial_sufsort.h b/exttools/pSAscan-0.1.0/src/psascan_src/partial_sufsort.h | |
deleted file mode 100644 | |
index 50a49cdd..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/partial_sufsort.h | |
+++ /dev/null | |
@@ -1,590 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/partial_sufsort.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_PARTIAL_SUFSORT_H_INCLUDED | |
-#define __PSASCAN_SRC_PARTIAL_SUFSORT_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstring> | |
-#include <string> | |
-#include <thread> | |
-#include <algorithm> | |
-#include <vector> | |
-#include <sys/stat.h> | |
-#include <fcntl.h> | |
- | |
-#include "inmem_psascan_src/inmem_psascan.h" | |
-#include "utils.h" | |
-#include "rank.h" | |
-#include "gap_array.h" | |
-#include "bitvector.h" | |
-#include "multifile.h" | |
-#include "distributed_file.h" | |
-#include "half_block_info.h" | |
-#include "bwt_merge.h" | |
-#include "compute_gap.h" | |
-#include "em_compute_initial_ranks.h" | |
-#include "compute_right_gap.h" | |
-#include "compute_left_gap.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-//============================================================================= | |
-// The main function processing the block. | |
-//============================================================================= | |
-template<typename block_offset_type> | |
-void process_block(long block_beg, long block_end, long text_length, long ram_use, | |
- long max_threads, long gap_buf_size, std::string text_filename, | |
- std::string output_filename, std::string gap_filename, | |
- multifile *newtail_gt_begin_rev, const multifile *tail_gt_begin_rev, | |
- std::vector<half_block_info<block_offset_type> > &hblock_info, bool verbose) { | |
- long block_size = block_end - block_beg; | |
- | |
- if (block_end != text_length && block_size <= 1) { | |
- fprintf(stderr, "Error: any block other than the last one has to be of length at least two.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- long block_tail_beg = block_end; | |
- long block_tail_end = text_length; | |
- | |
- bool last_block = (block_end == text_length); | |
- bool first_block = (block_beg == 0); | |
- | |
- long left_block_size; | |
- if (!last_block) left_block_size = std::max(1L, block_size / 2L); | |
- else left_block_size = std::min(block_size, std::max(1L, ram_use / 10L)); | |
- long right_block_size = block_size - left_block_size; | |
- long left_block_beg = block_beg; | |
- long left_block_end = block_beg + left_block_size; | |
- long right_block_beg = left_block_end; | |
- long right_block_end = block_end; | |
- // Invariant; left_block_size > 0. | |
- | |
- fprintf(stderr, " Block size = %ld (%.2LfMiB)\n", block_size, 1.L * block_size / (1 << 20)); | |
- fprintf(stderr, " Left half-block size = %ld (%.2LfMiB)\n", left_block_size, 1.L * left_block_size / (1 << 20)); | |
- fprintf(stderr, " Right half-block size = %ld (%.2LfMiB)\n", right_block_size, 1.L * right_block_size / (1 << 20)); | |
- | |
- std::vector<long> block_initial_ranks; | |
- unsigned char block_last_symbol = 0; | |
- | |
- long right_block_i0 = 0; | |
- long left_block_i0 = 0; | |
- | |
- std::string right_block_pbwt_fname = output_filename + "." + utils::random_string_hash(); | |
- std::string right_block_gt_begin_rev_fname = output_filename + "." + utils::random_string_hash(); | |
- | |
- half_block_info<block_offset_type> info_left; | |
- half_block_info<block_offset_type> info_right; | |
- | |
- info_left.beg = left_block_beg; | |
- info_left.end = left_block_end; | |
- if (right_block_size > 0) { | |
- info_right.beg = right_block_beg; | |
- info_right.end = right_block_end; | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 1: Process right half-block. | |
- //---------------------------------------------------------------------------- | |
- multifile *right_block_gt_begin_rev = NULL; | |
- unsigned char *right_block = NULL; | |
- | |
- if (right_block_size > 0) { | |
- fprintf(stderr, " Process right half-block:\n"); | |
- | |
- // 1.a | |
- // | |
- // Read the right half-block from disk. | |
- fprintf(stderr, " Read: "); | |
- right_block = (unsigned char *)malloc(right_block_size); | |
- long double right_block_read_start = utils::wclock(); | |
- utils::read_block(text_filename, right_block_beg, right_block_size, right_block); | |
- block_last_symbol = right_block[right_block_size - 1]; | |
- long double right_block_read_time = utils::wclock() - right_block_read_start; | |
- long double right_block_read_io = (right_block_size / (1024.L * 1024)) / right_block_read_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_block_read_time, right_block_read_io); | |
- | |
- // 1.b | |
- // | |
- // Compute partial SA, BWT and gt_begin of the right half-block. | |
- | |
- // Allocate SA, BWT and gt_begin. | |
- unsigned char *right_block_sabwt = (unsigned char *)malloc(right_block_size * (sizeof(block_offset_type) + 1)); | |
- block_offset_type *right_block_psa_ptr = (block_offset_type *)right_block_sabwt; | |
- unsigned char *right_block_bwt = (unsigned char *)(right_block_psa_ptr + right_block_size); | |
- bitvector *right_block_gt_begin_rev_bv = new bitvector(right_block_size); | |
- | |
- // Start the timer. | |
- fprintf(stderr, " Internal memory sufsort: "); | |
- if (verbose) fprintf(stderr, "\n%s\n", std::string(60, '*').c_str()); | |
- long double right_block_sascan_start = utils::wclock(); | |
- | |
- // Close stderr. | |
- int stderr_backup = 0; | |
- if (!verbose) { | |
- std::fflush(stderr); | |
- stderr_backup = dup(2); | |
- int stderr_temp = open("/dev/null", O_WRONLY); | |
- dup2(stderr_temp, 2); | |
- close(stderr_temp); | |
- } | |
- | |
- // Run in-memory pSAscan. | |
- inmem_psascan_private::inmem_psascan<block_offset_type>(right_block, right_block_size, right_block_sabwt, | |
- max_threads, !last_block, true, right_block_gt_begin_rev_bv, -1, right_block_beg, right_block_end, | |
- text_length, text_filename, tail_gt_begin_rev, &right_block_i0); | |
- | |
- // Restore stderr. | |
- if (!verbose) { | |
- std::fflush(stderr); | |
- dup2(stderr_backup, 2); | |
- close(stderr_backup); | |
- } | |
- | |
- // Print summary. | |
- long double right_block_sascan_time = utils::wclock() - right_block_sascan_start; | |
- long double right_block_sascan_speed = (right_block_size / (1024.L * 1024)) / right_block_sascan_time; | |
- if (verbose) fprintf(stderr, "%s\n", std::string(60, '*').c_str()); | |
- fprintf(stderr, "%.2Lfs. Speed: %.2LfMiB/s\n", right_block_sascan_time, right_block_sascan_speed); | |
- | |
- // 1.c | |
- // | |
- // Compute the first term of initial ranks for the block. | |
- if (!last_block) { | |
- fprintf(stderr, " Compute initial tail ranks (part 1): "); | |
- long double initial_ranks_first_term_start = utils::wclock(); | |
- em_compute_initial_ranks<block_offset_type>(right_block, right_block_psa_ptr, right_block_bwt, | |
- right_block_i0, right_block_beg, right_block_end, text_length, text_filename, | |
- tail_gt_begin_rev, block_initial_ranks, max_threads, block_tail_end, 0); // Note the space usage! | |
- | |
- size_t vec_size = block_initial_ranks.size(); | |
- for (size_t j = 0; j + 1 < vec_size; ++j) | |
- block_initial_ranks[j] = block_initial_ranks[j + 1]; | |
- block_initial_ranks[vec_size - 1] = 0; | |
- | |
- fprintf(stderr, "%.2Lfs\n", utils::wclock() - initial_ranks_first_term_start); | |
- } | |
- | |
- // 1.d | |
- // | |
- // Write the partial SA of the right half-block to disk. | |
- fprintf(stderr, " Write partial SA to disk: "); | |
- long double right_psa_save_start = utils::wclock(); | |
- long right_psa_max_part_length = std::max((long)sizeof(block_offset_type), ram_use / 20L); | |
- info_right.psa = new distributed_file<block_offset_type>(output_filename, | |
- right_psa_max_part_length, right_block_psa_ptr, right_block_psa_ptr + right_block_size); | |
- long double right_psa_save_time = utils::wclock() - right_psa_save_start; | |
- long double right_psa_save_io = ((right_block_size * sizeof(block_offset_type)) / (1024.L * 1024)) / right_psa_save_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_psa_save_time, right_psa_save_io); | |
- | |
- // 1.e | |
- // | |
- // Write the BWT of the right half-block on disk. | |
- if (!last_block) { | |
- fprintf(stderr, " Write BWT to disk: "); | |
- long double right_bwt_save_start = utils::wclock(); | |
- utils::write_objects_to_file(right_block_bwt, right_block_size, right_block_pbwt_fname); | |
- long double right_bwt_save_time = utils::wclock() - right_bwt_save_start; | |
- long double right_bwt_save_io = (right_block_size / (1024.L * 1024)) / right_bwt_save_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_bwt_save_time, right_bwt_save_io); | |
- } | |
- free(right_block_sabwt); | |
- | |
- // 1.f | |
- // | |
- // Write reversed gt_begin of the right half-block to disk. | |
- fprintf(stderr, " Write gt_begin to disk: "); | |
- long double right_gt_begin_rev_save_start = utils::wclock(); | |
- right_block_gt_begin_rev_bv->save(right_block_gt_begin_rev_fname); | |
- right_block_gt_begin_rev = new multifile(); | |
- right_block_gt_begin_rev->add_file(text_length - right_block_end, text_length - right_block_beg, | |
- right_block_gt_begin_rev_fname); | |
- delete right_block_gt_begin_rev_bv; | |
- long double right_gt_begin_rev_save_time = utils::wclock() - right_gt_begin_rev_save_start; | |
- long double right_gt_begin_rev_save_io = (right_block_size / (8.L * (1 << 20))) / right_gt_begin_rev_save_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_gt_begin_rev_save_time, right_gt_begin_rev_save_io); | |
- } | |
- | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 2: Process left half-block. | |
- //---------------------------------------------------------------------------- | |
- fprintf(stderr, " Process left half-block:\n"); | |
- | |
- // 2.a | |
- // | |
- // Read the left half-block from disk. | |
- fprintf(stderr, " Read: "); | |
- long double left_block_read_start = utils::wclock(); | |
- unsigned char *left_block = (unsigned char *)malloc(left_block_size); | |
- utils::read_block(text_filename, left_block_beg, left_block_size, left_block); | |
- unsigned char left_block_last = left_block[left_block_size - 1]; | |
- long double left_block_read_time = utils::wclock() - left_block_read_start; | |
- long double left_block_read_io = (left_block_size / (1024.L * 1024)) / left_block_read_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", left_block_read_time, left_block_read_io); | |
- | |
- // 2.b | |
- // | |
- // Compute partial SA, BWT and gt_begin for left half-block. | |
- | |
- // Allocate SA, BWT and gt_begin. | |
- unsigned char *left_block_sabwt = (unsigned char *)malloc(left_block_size * (sizeof(block_offset_type) + 1) + 1); | |
- block_offset_type *left_block_psa_ptr = (block_offset_type *)left_block_sabwt; | |
- unsigned char *left_block_bwt_ptr = (unsigned char *)(left_block_psa_ptr + left_block_size); | |
- bitvector *left_block_gt_begin_rev_bv = NULL; | |
- if (!first_block) left_block_gt_begin_rev_bv = new bitvector(left_block_size); | |
- | |
- // Start the timer. | |
- fprintf(stderr, " Internal memory sufsort: "); | |
- if (verbose) fprintf(stderr, "\n%s\n", std::string(60, '*').c_str()); | |
- long double left_block_sascan_start = utils::wclock(); | |
- | |
- // Close stderr. | |
- int stderr_backup = 0; | |
- if (!verbose) { | |
- std::fflush(stderr); | |
- stderr_backup = dup(2); | |
- int stderr_temp = open("/dev/null", O_WRONLY); | |
- dup2(stderr_temp, 2); | |
- close(stderr_temp); | |
- } | |
- | |
- // Run in-memory pSAscan. | |
- inmem_psascan_private::inmem_psascan<block_offset_type>(left_block, left_block_size, left_block_sabwt, | |
- max_threads, (right_block_size > 0), !first_block, left_block_gt_begin_rev_bv, -1, left_block_beg, | |
- left_block_end, text_length, text_filename, right_block_gt_begin_rev, &left_block_i0, right_block); | |
- | |
- // Restore stderr. | |
- if (!verbose) { | |
- std::fflush(stderr); | |
- dup2(stderr_backup, 2); | |
- close(stderr_backup); | |
- } | |
- | |
- // Print summary. | |
- long double left_block_sascan_time = utils::wclock() - left_block_sascan_start; | |
- long double left_block_sascan_speed = (left_block_size / (1024.L * 1024)) / left_block_sascan_time; | |
- if (verbose) fprintf(stderr, "%s\n", std::string(60, '*').c_str()); | |
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", left_block_sascan_time, left_block_sascan_speed); | |
- | |
- // 2.c | |
- // | |
- // Compute the second terms of block initial ranks. | |
- long after_block_initial_rank = 0; | |
- if (!last_block) { | |
- fprintf(stderr, " Compute initial tail ranks (part 2): "); | |
- long double initial_ranks_second_term_start = utils::wclock(); | |
- std::vector<long> block_initial_ranks_second_term; | |
- em_compute_initial_ranks<block_offset_type>(left_block, left_block_psa_ptr, left_block_beg, | |
- left_block_end, text_length, text_filename, tail_gt_begin_rev, block_initial_ranks_second_term, | |
- max_threads, block_tail_beg); // Note the space usage! | |
- | |
- after_block_initial_rank = block_initial_ranks_second_term[0]; | |
- size_t vec_size = block_initial_ranks_second_term.size(); | |
- for (size_t j = 0; j + 1 < vec_size; ++j) | |
- block_initial_ranks_second_term[j] = block_initial_ranks_second_term[j + 1]; | |
- block_initial_ranks_second_term[vec_size - 1] = 0; | |
- | |
- for (size_t j = 0; j < vec_size; ++j) | |
- block_initial_ranks[j] += block_initial_ranks_second_term[j]; | |
- fprintf(stderr, "%.2Lfs\n", utils::wclock() - initial_ranks_second_term_start); | |
- } | |
- | |
- // 2.d | |
- // | |
- // Write the partial SA of the left half-block to disk. | |
- fprintf(stderr, " Write partial SA to disk: "); | |
- long double left_psa_save_start = utils::wclock(); | |
- long left_psa_max_part_length = std::max((long)sizeof(block_offset_type), ram_use / 20L); | |
- info_left.psa = new distributed_file<block_offset_type>(output_filename, | |
- left_psa_max_part_length, left_block_psa_ptr, left_block_psa_ptr + left_block_size); | |
- long double left_psa_save_time = utils::wclock() - left_psa_save_start; | |
- long double left_psa_save_io = ((left_block_size * sizeof(block_offset_type)) / (1024.L * 1024)) / left_psa_save_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", left_psa_save_time, left_psa_save_io); | |
- | |
- // 2.e | |
- // | |
- // Copy the BWT of the left half-block to separate array. | |
- unsigned char *left_block_bwt = NULL; | |
- if (right_block_size > 0) { | |
- fprintf(stderr, " Copy BWT of left half-block to separate array: "); | |
- long double left_bwt_copy_start = utils::wclock(); | |
- left_block_bwt = (unsigned char *)malloc(left_block_size); | |
- std::copy(left_block_bwt_ptr, left_block_bwt_ptr + left_block_size, left_block_bwt); | |
- fprintf(stderr, "%.2Lfs\n", utils::wclock() - left_bwt_copy_start); | |
- } | |
- | |
- // 2.f | |
- // | |
- // Write gt_begin of the left half-block to disk. | |
- if (!first_block) { | |
- fprintf(stderr, " Write gt_begin to disk: "); | |
- long double left_gt_begin_rev_save_start = utils::wclock(); | |
- std::string left_block_gt_begin_rev_fname = output_filename + "." + utils::random_string_hash(); | |
- left_block_gt_begin_rev_bv->save(left_block_gt_begin_rev_fname); | |
- newtail_gt_begin_rev->add_file(text_length - left_block_end, text_length - left_block_beg, left_block_gt_begin_rev_fname); | |
- delete left_block_gt_begin_rev_bv; | |
- long double left_gt_begin_rev_save_time = utils::wclock() - left_gt_begin_rev_save_start; | |
- long double left_gt_begin_rev_save_io = (left_block_size / (8.L * (1 << 20))) / left_gt_begin_rev_save_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", left_gt_begin_rev_save_time, left_gt_begin_rev_save_io); | |
- } | |
- | |
- if (right_block_size == 0) { | |
- hblock_info.push_back(info_left); | |
- free(left_block); | |
- free(left_block_sabwt); | |
- return; | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 3: Compute the gap array of the left half-block wrt to the | |
- // right half-block. | |
- //---------------------------------------------------------------------------- | |
- fprintf(stderr, " Compute partial gap array for left half-block:\n"); | |
- buffered_gap_array *left_block_gap = NULL; | |
- | |
- // 3.a | |
- // | |
- // Compute initial ranks for streaming of the right half-block. | |
- fprintf(stderr, " Compute initial ranks: "); | |
- long double initial_ranks_right_half_block_start = utils::wclock(); | |
- std::vector<long> initial_ranks2; | |
- em_compute_initial_ranks<block_offset_type>(left_block, left_block_psa_ptr, left_block_bwt, | |
- left_block_i0, left_block_beg, left_block_end, text_length, text_filename, right_block_gt_begin_rev, | |
- initial_ranks2, max_threads, right_block_end, after_block_initial_rank); // Note the space usage! | |
- | |
- size_t vec_size = initial_ranks2.size(); | |
- for (size_t j = 0; j + 1 < vec_size; ++j) | |
- initial_ranks2[j] = initial_ranks2[j + 1]; | |
- initial_ranks2[vec_size - 1] = after_block_initial_rank; | |
- | |
- fprintf(stderr, "%.2Lfs\n", utils::wclock() - initial_ranks_right_half_block_start); | |
- free(left_block); | |
- free(left_block_sabwt); | |
- | |
- // 3.b | |
- // | |
- // Build the rank over BWT of left half-block. | |
- fprintf(stderr, " Construct rank: "); | |
- long double left_block_rank_build_start = utils::wclock(); | |
- rank4n<> *left_block_rank = new rank4n<>(left_block_bwt, left_block_size, max_threads); | |
- long double left_block_rank_build_time = utils::wclock() - left_block_rank_build_start; | |
- long double left_block_rank_build_speed = (left_block_size / (1024.L * 1024)) / left_block_rank_build_time; | |
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", left_block_rank_build_time, left_block_rank_build_speed); | |
- | |
- // 3.c | |
- // | |
- // Compute gap array of the left half-block wrt to the right half-block. | |
- left_block_gap = new buffered_gap_array(left_block_size + 1, gap_filename); | |
- compute_gap<block_offset_type>(left_block_rank, left_block_gap, right_block_beg, right_block_end, | |
- text_length, max_threads, left_block_i0, gap_buf_size, left_block_last, | |
- initial_ranks2, text_filename, output_filename, right_block_gt_begin_rev, newtail_gt_begin_rev); | |
- delete left_block_rank; | |
- delete right_block_gt_begin_rev; | |
- | |
- if (last_block) { | |
- free(left_block_bwt); | |
- | |
- info_left.gap_filename = gap_filename + ".gap." + utils::random_string_hash(); | |
- left_block_gap->save_to_file(info_left.gap_filename); | |
- left_block_gap->erase_disk_excess(); | |
- delete left_block_gap; | |
- | |
- hblock_info.push_back(info_left); | |
- hblock_info.push_back(info_right); | |
- return; | |
- } | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 4: Compute the BWT for the block. | |
- //---------------------------------------------------------------------------- | |
- fprintf(stderr, " Compute block gap array:\n"); | |
- | |
- // 4.a | |
- // | |
- // Convert the partial gap of the left half-block into bitvector. | |
- fprintf(stderr, " Convert partial gap array of left half-block to bitvector: "); | |
- long double convert_to_bitvector_start = utils::wclock(); | |
- bitvector *left_block_gap_bv = left_block_gap->convert_to_bitvector(max_threads); | |
- long double convert_to_bitvector_time = utils::wclock() - convert_to_bitvector_start; | |
- long double convert_to_bitvector_speed = (block_size / (1024.L * 1024)) / convert_to_bitvector_time; | |
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", convert_to_bitvector_time, convert_to_bitvector_speed); | |
- | |
- left_block_gap->erase_disk_excess(); | |
- delete left_block_gap; | |
- | |
- // 4.b | |
- // | |
- // Read the BWT of the right half-block into RAM. | |
- fprintf(stderr, " Read BWT of right half-block: "); | |
- long double right_block_bwt_read_start = utils::wclock(); | |
- unsigned char *right_block_bwt = NULL; | |
- utils::read_objects_from_file(right_block_bwt, right_block_size, right_block_pbwt_fname); | |
- long double right_block_bwt_read_time = utils::wclock() - right_block_bwt_read_start; | |
- long double right_block_bwt_read_io = (right_block_size / (1024.L * 1024)) / right_block_bwt_read_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_block_bwt_read_time, right_block_bwt_read_io); | |
- | |
- utils::file_delete(right_block_pbwt_fname); | |
- | |
- unsigned char *block_pbwt = (unsigned char *)malloc(block_size); | |
- long block_i0 = 0; | |
- | |
- // 4.c | |
- // | |
- // Merge BWTs of left and right half-block. | |
- fprintf(stderr, " Merge BWTs of half-blocks: "); | |
- long double bwt_merge_start = utils::wclock(); | |
- block_i0 = merge_bwt(left_block_bwt, right_block_bwt, left_block_size, right_block_size, | |
- left_block_i0, right_block_i0, left_block_last, block_pbwt, left_block_gap_bv, max_threads); | |
- long double bwt_merge_time = utils::wclock() - bwt_merge_start; | |
- long double bwt_merge_speed = (block_size / (1024.L * 1024)) / bwt_merge_time; | |
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", bwt_merge_time, bwt_merge_speed); | |
- | |
- free(left_block_bwt); | |
- free(right_block_bwt); | |
- | |
- // 4.d | |
- // | |
- // Write left_block_gap_bv to disk. | |
- fprintf(stderr, " Write left half-block gap bitvector to disk: "); | |
- long double write_left_gap_bv_start = utils::wclock(); | |
- std::string left_block_gap_bv_filename = gap_filename + ".left_block_gap_bv"; | |
- left_block_gap_bv->save(left_block_gap_bv_filename); | |
- delete left_block_gap_bv; | |
- long double write_left_gap_bv_time = utils::wclock() - write_left_gap_bv_start; | |
- long double write_left_gap_bv_io = ((block_size / 8.L) / (1 << 20)) / write_left_gap_bv_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", write_left_gap_bv_time, write_left_gap_bv_io); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 5: Compute the gap array of the block. | |
- //---------------------------------------------------------------------------- | |
- | |
- // 5.a | |
- // | |
- // Construct the rank data structure over BWT of the block. | |
- fprintf(stderr, " Construct rank: "); | |
- long double whole_block_rank_build_start = utils::wclock(); | |
- rank4n<> *block_rank = new rank4n<>(block_pbwt, block_size, max_threads); | |
- free(block_pbwt); | |
- long double whole_block_rank_build_time = utils::wclock() - whole_block_rank_build_start; | |
- long double whole_block_rank_build_io = (block_size / (1024.L * 1024)) / whole_block_rank_build_time; | |
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", whole_block_rank_build_time, whole_block_rank_build_io); | |
- | |
- buffered_gap_array *block_gap = new buffered_gap_array(block_size + 1, gap_filename); | |
- | |
- // 5.b | |
- // | |
- // Compute gap for the block. During this step we also compute gt_begin | |
- // for the new tail. | |
- compute_gap<block_offset_type>(block_rank, block_gap, block_tail_beg, block_tail_end, text_length, | |
- max_threads, block_i0, gap_buf_size, block_last_symbol, block_initial_ranks, text_filename, | |
- output_filename, tail_gt_begin_rev, newtail_gt_begin_rev); | |
- delete block_rank; | |
- | |
- block_gap->flush_excess_to_disk(); | |
- | |
- // 5.c | |
- // | |
- // Read left_block_gap_bv from disk. | |
- fprintf(stderr, " Read left half-block gap bitvector from disk: "); | |
- long double left_block_gap_bv_read_start = utils::wclock(); | |
- left_block_gap_bv = new bitvector(left_block_gap_bv_filename); | |
- long double left_block_gap_bv_read_time = utils::wclock() - left_block_gap_bv_read_start; | |
- long double left_block_gap_bv_read_io = ((block_size / 8.L) / (1 << 20)) / left_block_gap_bv_read_time; | |
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", left_block_gap_bv_read_time, left_block_gap_bv_read_io); | |
- utils::file_delete(left_block_gap_bv_filename); | |
- | |
- //---------------------------------------------------------------------------- | |
- // STEP 6: Compute gap arrays of half-blocks. | |
- //---------------------------------------------------------------------------- | |
- info_left.gap_filename = gap_filename + ".gap." + utils::random_string_hash(); | |
- info_right.gap_filename = gap_filename + ".gap." + utils::random_string_hash(); | |
- | |
- gap_array_2n *block_gap_2n = new gap_array_2n(block_gap, max_threads); | |
- delete block_gap; | |
- block_gap_2n->apply_excess_from_disk(std::max((1L << 20), block_size), max_threads); | |
- | |
- long ram_budget = std::max(1L << 20, (long)(0.875L * block_size)); | |
- compute_right_gap(left_block_size, right_block_size, block_gap_2n, left_block_gap_bv, info_right.gap_filename, max_threads, ram_budget); | |
- compute_left_gap(left_block_size, right_block_size, block_gap_2n, left_block_gap_bv, info_left.gap_filename, max_threads, ram_budget); | |
- | |
- block_gap_2n->erase_disk_excess(); | |
- | |
- delete block_gap_2n; | |
- delete left_block_gap_bv; | |
- | |
- hblock_info.push_back(info_left); | |
- hblock_info.push_back(info_right); | |
-} | |
- | |
- | |
-//============================================================================= | |
-// Compute partial SAs and gap arrays and write to disk. | |
-// Return the array of handlers to distributed files as a result. | |
-//============================================================================= | |
-template<typename block_offset_type> | |
-std::vector<half_block_info<block_offset_type> > partial_sufsort(std::string text_filename, std::string output_filename, | |
- std::string gap_filename, long text_length, long max_block_size, long ram_use, long max_threads, long gap_buf_size, | |
- bool verbose) { | |
- fprintf(stderr, "sizeof(block_offset_type) = %lu\n\n", sizeof(block_offset_type)); | |
- | |
- long n_blocks = (text_length + max_block_size - 1) / max_block_size; | |
- multifile *tail_gt_begin_reversed = NULL; | |
- | |
- std::vector<half_block_info<block_offset_type> > hblock_info; | |
- for (long block_id = n_blocks - 1; block_id >= 0; --block_id) { | |
- long block_beg = max_block_size * block_id; | |
- long block_end = std::min(block_beg + max_block_size, text_length); | |
- fprintf(stderr, "Process block %ld/%ld [%ld..%ld):\n", n_blocks - block_id, n_blocks, block_beg, block_end); | |
- | |
- multifile *newtail_gt_begin_reversed = new multifile(); | |
- process_block<block_offset_type>(block_beg, block_end, text_length, ram_use, max_threads, gap_buf_size, | |
- text_filename, output_filename, gap_filename, newtail_gt_begin_reversed, tail_gt_begin_reversed, | |
- hblock_info, verbose); | |
- | |
- delete tail_gt_begin_reversed; | |
- tail_gt_begin_reversed = newtail_gt_begin_reversed; | |
- } | |
- | |
- delete tail_gt_begin_reversed; | |
- return hblock_info; | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_PARTIAL_SUFSORT_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/psascan.h b/exttools/pSAscan-0.1.0/src/psascan_src/psascan.h | |
deleted file mode 100644 | |
index 5fe9b87b..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/psascan.h | |
+++ /dev/null | |
@@ -1,145 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/psascan.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_PSASCAN_H_INCLUDED | |
-#define __PSASCAN_SRC_PSASCAN_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <string> | |
-#include <vector> | |
-#include <algorithm> | |
-#include <sys/resource.h> | |
- | |
-#include "partial_sufsort.h" | |
-#include "merge.h" | |
-#include "utils.h" | |
-#include "uint40.h" | |
-#include "half_block_info.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-void pSAscan(std::string input_filename, std::string output_filename, | |
- std::string gap_filename, long ram_use, long max_threads, | |
- bool verbose, long gap_buf_size = (1L << 21)) { | |
- long n_gap_buffers = 2 * max_threads; | |
- if (ram_use < 6L) { | |
- fprintf(stderr, "Error: not enough memory to run pSAscan.\n"); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Turn paths absolute. | |
- input_filename = utils::absolute_path(input_filename); | |
- output_filename = utils::absolute_path(output_filename); | |
- gap_filename = utils::absolute_path(gap_filename); | |
- long length = utils::file_size(input_filename); | |
- fprintf(stderr, "Input filename = %s\n", input_filename.c_str()); | |
- fprintf(stderr, "Output filename = %s\n", output_filename.c_str()); | |
- fprintf(stderr, "Gap filename = %s\n", gap_filename.c_str()); | |
- fprintf(stderr, "Input length = %ld (%.1LfMiB)\n", length, 1.L * length / (1L << 20)); | |
- fprintf(stderr, "\n"); | |
- | |
- long ram_for_threads = n_gap_buffers * gap_buf_size; // for buffers | |
- if (ram_use / 5.2L < (long double)(1L << 31)) // for oracle | |
- ram_for_threads += max_threads * gap_buf_size; | |
- else ram_for_threads += ((4.L / 5) * max_threads) * gap_buf_size; | |
- ram_for_threads += max_threads * gap_buf_size; // for temp | |
- ram_for_threads += max_threads * (6L << 20); // for reader/writer buffers | |
- | |
- long ram_use_excluding_threads = ram_use - ram_for_threads; | |
- if (ram_use_excluding_threads < 6L) { | |
- long required_MiB = (ram_for_threads + (1L << 20) - 1) / (1L << 20); | |
- fprintf(stderr, "Error: not enough memory to start threads. You need " | |
- "at least %ldMiB\n", required_MiB + 1); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- fprintf(stderr, "RAM budget = %ld (%.1LfMiB)\n", ram_use, 1.L * ram_use / (1L << 20)); | |
- fprintf(stderr, "RAM budget (excluding threads) = %ld (%.1LfMiB)\n", | |
- ram_use_excluding_threads, 1.L * ram_use_excluding_threads / (1L << 20)); | |
- long max_block_size = std::max(2L, (long)(ram_use_excluding_threads / 5.2L)); | |
- | |
- fprintf(stderr, "Max block size = %ld (%.1LfMiB)\n\n", max_block_size, 1.L * max_block_size / (1L << 20)); | |
- fprintf(stderr, "Parallel settings:\n"); | |
- fprintf(stderr, " #streaming threads = %ld\n", max_threads); | |
- fprintf(stderr, " #gap buffers = %ld\n", n_gap_buffers); | |
- fprintf(stderr, " gap buffer size = %ld\n\n", gap_buf_size); | |
- | |
- // Check if the maximum number of open files | |
- // is large enough for the merging to work. | |
- long n_half_blocks_estimated = 2L * (length / max_block_size + 1); | |
- long merge_max_open_files_estimated = 2L * n_half_blocks_estimated; | |
- long stream_max_open_files_estimated = 3L * max_threads + 1; | |
- long max_open_files_estimated = std::max(merge_max_open_files_estimated, stream_max_open_files_estimated); | |
- rlimit rlimit_res; | |
- if (!getrlimit(RLIMIT_NOFILE, &rlimit_res) && | |
- (long)rlimit_res.rlim_cur < max_open_files_estimated) { | |
- fprintf(stderr, | |
-"\nError: the limit on the maximum number of open files is too small\n" | |
-"(current limit = %ld, required limit = %ld). See the README for\n" | |
-"more information.\n", | |
- (long)rlimit_res.rlim_cur, max_open_files_estimated); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- long double start = utils::wclock(); | |
- if (max_block_size < (1L << 31)) { | |
- std::vector<half_block_info<int> > hblock_info = partial_sufsort<int>(input_filename, | |
- output_filename, gap_filename, length, max_block_size, ram_use, max_threads, gap_buf_size, verbose); | |
- merge<int>(output_filename, ram_use, hblock_info); | |
- } else { | |
- std::vector<half_block_info<uint40> > hblock_info = partial_sufsort<uint40>(input_filename, | |
- output_filename, gap_filename, length, max_block_size, ram_use, max_threads, gap_buf_size, verbose); | |
- merge<uint40>(output_filename, ram_use, hblock_info); | |
- } | |
- long double total_time = utils::wclock() - start; | |
- | |
- fprintf(stderr, "\n\nComputation finished. Summary:\n"); | |
- fprintf(stderr, " elapsed time: %.2Lfs (%.4Lfs/MiB)\n", total_time, total_time / ((1.L * length) / (1L << 20))); | |
- fprintf(stderr, " speed: %.2LfMiB/s\n", ((1.L * length) / (1L << 20)) / total_time); | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
- | |
-// The main function. | |
-void pSAscan(std::string input_filename, std::string output_filename, | |
- std::string gap_filename, long ram_use, long max_threads, bool verbose) { | |
- psascan_private::pSAscan(input_filename, output_filename, | |
- gap_filename, ram_use, max_threads, verbose); | |
-} | |
- | |
-#endif // __PSASCAN_SRC_PSASCAN_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/rank.h b/exttools/pSAscan-0.1.0/src/psascan_src/rank.h | |
deleted file mode 100644 | |
index 777a6251..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/rank.h | |
+++ /dev/null | |
@@ -1,778 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/rank.h | |
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section DESCRIPTION | |
- * | |
- * A general rank data structure. Basic idea of the encoding is from | |
- * the rank data structure used in the external-memory algorithm for | |
- * constructing the Burrows-Wheeler transform called bwtdisk (available | |
- * at: http://people.unipmn.it/manzini/bwtdisk/) described in [1]. We | |
- * extended the data structure by applying the fixed block boosting [2] | |
- * and alphabet partitioning [3] techniques. The resulting data structure | |
- * was described in [4]. This file extends the implementation used in [4] | |
- * by parallelizing the construction and introducting an alternative | |
- * encoding (called type-I in the code). Type-I encoding is a novel | |
- * encoding due to present authors. | |
- * | |
- * References: | |
- * [1] Paolo Ferragina, Travis Gagie, Giovanni Manzini: | |
- * Lightweight Data Indexing and Compression in External Memory. | |
- * Algorithmica 63(3), p. 707-730 (2012). | |
- * [2] Juha Karkkainen, Simon J. Puglisi: | |
- * Fixed Block Compression Boosting in FM-Indexes. | |
- * In Proc. SPIRE 2011, p. 174-184. | |
- * [3] Jeremy Barbay, Travis Gagie, Gonzalo Navarro, Yakov Nekrich: | |
- * Alphabet Partitioning for Compressed Rank/Select and Applications. | |
- * In Proc. ISAAC 2010, p. 315-326. | |
- * [4] Juha Karkkainen, Dominik Kempa: | |
- * Engineering a Lightweight External Memory Suffix Array Construction | |
- * Algorithm. | |
- * In Proc. ICABD 2014, p. 53-60. | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_RANK_H_INCLUDED | |
-#define __PSASCAN_SRC_RANK_H_INCLUDED | |
- | |
-#include <algorithm> | |
-#include <vector> | |
-#include <thread> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-template< | |
- unsigned k_sblock_size_log = 24, | |
- unsigned k_cblock_size_log = 20, | |
- unsigned k_sigma_log = 8> | |
-class rank4n { | |
- private: | |
- static const unsigned long k_cblock_size; | |
- static const unsigned long k_cblock_size_mask; | |
- static const unsigned long k_cblock_size_mask_neg; | |
- static const unsigned k_cblocks_in_sblock_log; | |
- static const unsigned k_cblocks_in_sblock; | |
- static const unsigned k_cblocks_in_sblock_mask; | |
- static const unsigned k_2cblock_size; | |
- static const unsigned k_2cblock_size_mask; | |
- static const unsigned k_sblock_size; | |
- static const unsigned k_sblock_size_mask; | |
- static const unsigned k_sigma; | |
- static const unsigned k_sigma_mask; | |
- | |
- static const unsigned k_char_type_freq = 0x01; | |
- static const unsigned k_char_type_rare = 0x02; | |
- static const unsigned k_char_type_missing = 0x03; | |
- | |
- unsigned long m_length; // length of original sequence | |
- unsigned long n_cblocks; // number of context blocks | |
- unsigned long n_sblocks; // number of super blocks | |
- | |
- unsigned long *m_sblock_header; | |
- unsigned long *m_cblock_header; | |
- unsigned long *m_cblock_header2; | |
- | |
- unsigned char *m_cblock_type; | |
- unsigned char *m_cblock_mapping; | |
- | |
- unsigned *m_freq_trunk; | |
- unsigned *m_rare_trunk; | |
- | |
- public: | |
- unsigned long *m_count; // symbol counts | |
- | |
- public: | |
- rank4n(const unsigned char *text, unsigned long length, unsigned max_threads) { | |
- m_length = length; | |
- n_cblocks = (m_length + k_cblock_size - 1) / k_cblock_size; | |
- n_sblocks = (n_cblocks + k_cblocks_in_sblock - 1) / k_cblocks_in_sblock; | |
- | |
- m_count = (unsigned long *)malloc(256L * sizeof(unsigned long)); | |
- std::fill(m_count, m_count + 256, 0UL); | |
- if (!m_length) return; | |
- | |
- m_sblock_header = (unsigned long *)malloc(n_sblocks * sizeof(unsigned long) * k_sigma); | |
- m_cblock_header = (unsigned long *)malloc(n_cblocks * sizeof(unsigned long)); | |
- m_cblock_header2 = (unsigned long *)malloc(n_cblocks * k_sigma * sizeof(unsigned long)); | |
- m_cblock_mapping = (unsigned char *)malloc(n_cblocks * k_sigma * 2); | |
- m_cblock_type = (unsigned char *)malloc((n_cblocks + 7) / 8); | |
- m_freq_trunk = (unsigned *)calloc(n_cblocks * k_cblock_size, sizeof(unsigned)); | |
- std::fill(m_cblock_type, m_cblock_type + (n_cblocks + 7) / 8, 0); | |
- | |
- encode_type_I(text, max_threads); | |
- encode_type_II(text, max_threads); | |
- | |
- m_count[0] -= n_cblocks * k_cblock_size - m_length; // remove extra zeros | |
- } | |
- | |
- void encode_type_I(const unsigned char *text, long max_threads) { | |
- //------------------------------------------------------------------------ | |
- // STEP 1: split all cblocks into equal size ranges (except possible the | |
- // last one). Each range is processed by one thread. During this | |
- // step we compute: (i) type of each cblock, (ii) encode all | |
- // type-I cblocks and for all type-II cblocks, we compute and | |
- // store: symbol mapping, symbol type (freq / rare / non-occurring) | |
- // and values of freq_cnt_log and rare_cnt_log. | |
- //------------------------------------------------------------------------ | |
- unsigned long range_size = (n_cblocks + max_threads - 1) / max_threads; | |
- unsigned long n_ranges = (n_cblocks + range_size - 1) / range_size; | |
- | |
- unsigned long *rare_trunk_size = new unsigned long[n_cblocks]; | |
- std::fill(rare_trunk_size, rare_trunk_size + n_cblocks, 0); | |
- | |
- bool *cblock_type = new bool[n_cblocks]; | |
- std::fill(cblock_type, cblock_type + n_cblocks, 0); | |
- | |
- unsigned **occ = (unsigned **)malloc(n_ranges * sizeof(unsigned *)); | |
- for (unsigned long i = 0; i < n_ranges; ++i) | |
- occ[i] = (unsigned *)malloc((k_cblock_size + 1) * sizeof(unsigned)); | |
- | |
- std::thread **threads = new std::thread*[n_ranges]; | |
- for (unsigned long i = 0; i < n_ranges; ++i) { | |
- unsigned long range_beg = i * range_size; | |
- unsigned long range_end = std::min(range_beg + range_size, n_cblocks); | |
- | |
- threads[i] = new std::thread(encode_type_I_aux, std::ref(*this), | |
- text, range_beg, range_end, rare_trunk_size, cblock_type, occ[i]); | |
- } | |
- | |
- for (unsigned long i = 0; i < n_ranges; ++i) threads[i]->join(); | |
- for (unsigned long i = 0; i < n_ranges; ++i) delete threads[i]; | |
- delete[] threads; | |
- | |
- for (unsigned long i = 0; i < n_ranges; ++i) | |
- free(occ[i]); | |
- free(occ); | |
- | |
- //------------------------------------------------------------------------ | |
- // STEP 2: compute global information based on local cblock computation: | |
- // * store cblock types, | |
- // * total size of rare trunk, | |
- // * pointers to the beginning of each rare trunk, | |
- // * cumulative counts of all symbols, | |
- // * non-inclusive partial sum over cblock range counts. | |
- //------------------------------------------------------------------------ | |
- unsigned long rare_trunk_total_size = 0; | |
- for (unsigned long cblock_id = 0; cblock_id < n_cblocks; ++cblock_id) { | |
- unsigned long cblock_beg = (cblock_id << k_cblock_size_log); | |
- | |
- // 1 | |
- // Store cblock type. | |
- if (cblock_type[cblock_id]) | |
- m_cblock_type[cblock_id >> 3] |= (1 << (cblock_id & 7)); | |
- | |
- // 2 | |
- // Compute the pointer to rare trunk and update total rare trunk size. | |
- unsigned long this_cblock_rare_trunk_size = rare_trunk_size[cblock_id]; | |
- m_cblock_header[cblock_id] |= (rare_trunk_total_size << 16); | |
- rare_trunk_total_size += this_cblock_rare_trunk_size; | |
- | |
- // 3 | |
- // Update cblock header. | |
- unsigned long cblock_header_beg = (cblock_id << k_sigma_log); | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- m_cblock_header2[cblock_header_beg + c] |= (m_count[c] << (k_cblock_size_log + 6)); | |
- | |
- // 4 | |
- // Update sblock header, | |
- if (!(cblock_beg & k_sblock_size_mask)) { | |
- unsigned long sblock_id = (cblock_beg >> k_sblock_size_log); | |
- unsigned long sblock_header_beg = (sblock_id << k_sigma_log); | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- m_sblock_header[sblock_header_beg + c] = m_count[c]; | |
- } | |
- | |
- // 5 | |
- // Update m_count. | |
- unsigned long ptr = (cblock_id << k_sigma_log); | |
- for (unsigned c = 0; c + 1 < k_sigma; ++c) | |
- m_count[c] += ((m_cblock_header2[ptr + c + 1] >> 5) & k_2cblock_size_mask) - | |
- ((m_cblock_header2[ptr + c] >> 5) & k_2cblock_size_mask); | |
- m_count[k_sigma - 1] += k_cblock_size - | |
- ((m_cblock_header2[ptr + k_sigma - 1] >> 5) & k_2cblock_size_mask); | |
- } | |
- m_rare_trunk = (unsigned *)calloc(rare_trunk_total_size, sizeof(unsigned)); | |
- | |
- delete[] cblock_type; | |
- delete[] rare_trunk_size; | |
- } | |
- | |
- static void encode_type_I_aux(rank4n &r, const unsigned char *text, | |
- unsigned long cblock_range_beg, unsigned long cblock_range_end, | |
- unsigned long *rare_trunk_size, bool *cblock_type, unsigned *occ) { | |
- std::vector<std::pair<uint32_t, unsigned char> > sorted_chars; | |
- std::vector<unsigned char> freq_chars; | |
- std::vector<unsigned char> rare_chars; | |
- | |
- unsigned *refpoint_precomputed = (unsigned *)malloc(k_cblock_size * sizeof(unsigned)); | |
- unsigned *cblock_count = new unsigned[k_sigma]; | |
- unsigned *list_beg = new unsigned[k_sigma]; | |
- unsigned *list_beg2 = new unsigned[k_sigma]; | |
- bool *isfreq = new bool[k_sigma]; | |
- unsigned *lookup_bits_precomputed = new unsigned[k_sigma]; | |
- unsigned *min_block_size_precomputed = new unsigned[k_sigma]; | |
- unsigned long *refpoint_mask_precomputed = new unsigned long[k_sigma]; | |
- | |
- // Process cblocks one by one. | |
- for (unsigned long cblock_id = cblock_range_beg; cblock_id < cblock_range_end; ++cblock_id) { | |
- unsigned long cblock_beg = cblock_id << k_cblock_size_log; | |
- unsigned long cblock_end = cblock_beg + k_cblock_size; | |
- | |
- // Compute symbol counts inside cblock. | |
- std::fill(cblock_count, cblock_count + k_sigma, 0); | |
- unsigned long maxj = std::min(cblock_end, r.m_length); | |
- for (unsigned long j = cblock_beg; j < maxj; ++j) | |
- ++cblock_count[text[j]]; | |
- cblock_count[0] += cblock_end - maxj; | |
- | |
- // Compute starting positions of occurrences lists. | |
- for (unsigned j = 0, t, s = 0; j < k_sigma; ++j) { | |
- t = cblock_count[j]; | |
- list_beg[j] = s; | |
- list_beg2[j] = s; | |
- s += t; | |
- } | |
- | |
- // Store pointers to beginnings of occurrence lists in the type-I | |
- // cblock header. Note: this implicitly encodes cblock counts. | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- r.m_cblock_header2[(cblock_id << k_sigma_log) + c] = (list_beg[c] << 5); | |
- | |
- // Sort symbol counts by frequencies. | |
- sorted_chars.clear(); | |
- for (unsigned j = 0; j < k_sigma; ++j) | |
- if (cblock_count[j]) | |
- sorted_chars.push_back(std::make_pair(cblock_count[j], j)); | |
- std::sort(sorted_chars.begin(), sorted_chars.end()); | |
- | |
- // Separate (at most, due to rounding of freq_cnt) | |
- // about 3% of rarest symbols. | |
- unsigned rare_cnt = 0L, rare_sum = 0L; | |
- while (rare_cnt < sorted_chars.size() && | |
- 16L * (rare_sum + sorted_chars[rare_cnt].first) <= k_cblock_size) | |
- rare_sum += sorted_chars[rare_cnt++].first; | |
- | |
- // Compute freq_cnt. Then round up freq_cnt + 1 (+1 is | |
- // for rare char marker) to the smallest power of two. | |
- // Note: rare_cnt > 0, so after rounding freq_cnt <= 256. | |
- unsigned freq_cnt = sorted_chars.size() - rare_cnt; | |
- unsigned freq_cnt_log = utils::log2ceil(freq_cnt + 1); | |
- freq_cnt = (1 << freq_cnt_log); | |
- | |
- // Recompute rare_cnt (note the +1). | |
- rare_cnt = 0; | |
- if (sorted_chars.size() + 1 > freq_cnt) | |
- rare_cnt = sorted_chars.size() + 1 - freq_cnt; | |
- | |
- // Compute freq and rare chars. | |
- rare_chars.clear(); | |
- freq_chars.clear(); | |
- for (unsigned i = 0; i < rare_cnt; ++i) | |
- rare_chars.push_back(sorted_chars[i].second); | |
- for (unsigned i = rare_cnt; i < sorted_chars.size(); ++i) | |
- freq_chars.push_back(sorted_chars[i].second); | |
- | |
- // If there are rare symbols, round up | |
- // rare_cnt to the smallest power of two. | |
- unsigned rare_cnt_log = 0; | |
- if (rare_cnt) { | |
- rare_cnt_log = utils::log2ceil(rare_cnt); | |
- rare_cnt = (1 << rare_cnt_log); | |
- } | |
- | |
- // Update cblock type-I header. | |
- r.m_cblock_header[cblock_id] = freq_cnt_log; | |
- r.m_cblock_header[cblock_id] |= (rare_cnt_log << 8); | |
- | |
- // Compute and store symbols mapping. | |
- std::sort(freq_chars.begin(), freq_chars.end()); | |
- std::sort(rare_chars.begin(), rare_chars.end()); | |
- std::fill(isfreq, isfreq + 256, false); | |
- for (unsigned c = 0; c < 256; ++c) | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_missing; | |
- for (unsigned i = 0; i < freq_chars.size(); ++i) { | |
- unsigned char c = freq_chars[i]; | |
- isfreq[c] = true; | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1] = i; | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_freq; | |
- } | |
- for (unsigned i = 0; i < rare_chars.size(); ++i) { | |
- unsigned char c = rare_chars[i]; | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1] = i; | |
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_rare; | |
- } | |
- | |
- unsigned nofreq_cnt = 0L; | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- if (!isfreq[c]) nofreq_cnt += cblock_count[c]; | |
- | |
- | |
- if (freq_cnt >= 128) { // type-I cblock | |
- cblock_type[cblock_id] = true; | |
- | |
- // Compute lists of occurrences. | |
- for (unsigned long i = cblock_beg; i < maxj; ++i) | |
- occ[list_beg2[text[i]]++] = i - cblock_beg; | |
- for (unsigned long i = maxj; i < cblock_end; ++i) | |
- occ[list_beg2[0]++] = i - cblock_beg; | |
- | |
- // Precompute helper arrays and and store lookup bits into the header. | |
- for (unsigned c = 0; c < k_sigma; ++c) { | |
- lookup_bits_precomputed[c] = utils::log2ceil(cblock_count[c] + 2); | |
- r.m_cblock_header2[(cblock_id << 8) + c] |= lookup_bits_precomputed[c]; | |
- if (cblock_count[c]) | |
- min_block_size_precomputed[c] = k_cblock_size / cblock_count[c]; | |
- else min_block_size_precomputed[c] = 0; | |
- | |
- unsigned refpoint_dist_log = 31 - lookup_bits_precomputed[c]; | |
- unsigned long refpoint_dist = (1UL << refpoint_dist_log); | |
- unsigned long refpoint_dist_mask = refpoint_dist - 1; | |
- unsigned long refpoint_dist_mask_neg = (~refpoint_dist_mask); | |
- refpoint_mask_precomputed[c] = refpoint_dist_mask_neg; | |
- } | |
- | |
- // Actual encoding follows. | |
- unsigned *cblock_trunk = r.m_freq_trunk + cblock_beg; | |
- for (unsigned c = 0; c < k_sigma; ++c) { | |
- unsigned freq = cblock_count[c]; | |
- unsigned min_block_size = min_block_size_precomputed[c]; | |
- unsigned lookup_bits = lookup_bits_precomputed[c]; | |
- unsigned refpoint_dist_mask_neg = refpoint_mask_precomputed[c]; | |
- unsigned c_list_beg = list_beg[c]; | |
- | |
- for (unsigned j = 0; j < freq; ++j) | |
- cblock_trunk[c_list_beg + j] = freq + 1; | |
- if (freq) cblock_trunk[c_list_beg + freq - 1] = freq; | |
- | |
- unsigned block_beg = 0; | |
- for (unsigned j = 0; j < freq; ++j) { | |
- refpoint_precomputed[j] = (block_beg & refpoint_dist_mask_neg); | |
- block_beg += min_block_size; | |
- if ((((unsigned long)block_beg * freq) >> k_cblock_size_log) == j) ++block_beg; | |
- } | |
- | |
- unsigned refpoint, block_id; | |
- unsigned mask = (~((1UL << lookup_bits) - 1)); | |
- if (freq) { | |
- for (long j = freq - 1; j >= 0; --j) { | |
- block_id = (((unsigned long)occ[c_list_beg + j] * freq) >> k_cblock_size_log); | |
- refpoint = refpoint_precomputed[block_id]; | |
- cblock_trunk[c_list_beg + block_id] &= mask; | |
- cblock_trunk[c_list_beg + block_id] |= (unsigned)j; | |
- cblock_trunk[c_list_beg + j] |= ((occ[c_list_beg + j] - refpoint) << lookup_bits); | |
- } | |
- } | |
- } | |
- } else { | |
- // Update rare_trunk_size. | |
- if (rare_cnt) { | |
- long rare_blocks = 1 + (nofreq_cnt + rare_cnt - 1) / rare_cnt; | |
- rare_trunk_size[cblock_id] = rare_blocks * rare_cnt; | |
- } | |
- } | |
- } | |
- | |
- // Clean up. | |
- delete[] list_beg; | |
- delete[] list_beg2; | |
- delete[] isfreq; | |
- delete[] cblock_count; | |
- delete[] lookup_bits_precomputed; | |
- delete[] min_block_size_precomputed; | |
- delete[] refpoint_mask_precomputed; | |
- free(refpoint_precomputed); | |
- } | |
- | |
- void encode_type_II(const unsigned char *text, long max_threads) { | |
- unsigned long range_size = (n_cblocks + max_threads - 1) / max_threads; | |
- unsigned long n_ranges = (n_cblocks + range_size - 1) / range_size; | |
- | |
- std::thread **threads = new std::thread*[n_ranges]; | |
- for (unsigned long i = 0; i < n_ranges; ++i) { | |
- unsigned long range_beg = i * range_size; | |
- unsigned long range_end = std::min(range_beg + range_size, n_cblocks); | |
- | |
- threads[i] = new std::thread(encode_type_II_aux, | |
- std::ref(*this), text, range_beg, range_end); | |
- } | |
- | |
- for (unsigned long i = 0; i < n_ranges; ++i) threads[i]->join(); | |
- for (unsigned long i = 0; i < n_ranges; ++i) delete threads[i]; | |
- delete[] threads; | |
- } | |
- | |
- static void encode_type_II_aux(rank4n &r, const unsigned char *text, | |
- unsigned long cblock_range_beg, unsigned long cblock_range_end) { | |
- unsigned char *freq_map = new unsigned char[k_sigma]; | |
- unsigned char *rare_map = new unsigned char[k_sigma]; | |
- unsigned long *cur_count = new unsigned long[k_sigma]; | |
- unsigned long *off = new unsigned long[k_sigma]; | |
- | |
- long *sblock_h = new long[k_sigma]; | |
- int *israre = new int[k_sigma]; | |
- | |
- std::vector<unsigned char> freq_chars; | |
- std::vector<unsigned char> rare_chars; | |
- | |
- for (unsigned long cblock_id = cblock_range_beg; cblock_id < cblock_range_end; ++cblock_id) { | |
- unsigned long cblock_beg = cblock_id << k_cblock_size_log; | |
- unsigned long cblock_end = cblock_beg + k_cblock_size; | |
- | |
- // Skip the cblock if it was type-I encoded. | |
- if (r.m_cblock_type[cblock_id >> 3] & (1 << (cblock_id & 7))) continue; | |
- | |
- // Retreive symbol counts up to this cblock begin and | |
- // pointer to rare trunk size from cblock headers. | |
- for (unsigned c = 0; c < k_sigma; ++c) | |
- cur_count[c] = (r.m_cblock_header2[(cblock_id << 8) + c] >> (k_cblock_size_log + 6)); | |
- | |
- long r_filled = (r.m_cblock_header[cblock_id] >> 16); | |
- long r_ptr = r_filled; | |
- | |
- long freq_cnt_log = (r.m_cblock_header[cblock_id] & 255L); | |
- long rare_cnt_log = ((r.m_cblock_header[cblock_id] >> 8) & 255L); | |
- long freq_cnt = (1L << freq_cnt_log); | |
- long rare_cnt = (1L << rare_cnt_log); | |
- long rare_cnt_mask = rare_cnt - 1; | |
- | |
- freq_chars.clear(); | |
- rare_chars.clear(); | |
- std::fill(israre, israre + k_sigma, 1); | |
- for (unsigned c = 0; c < k_sigma; ++c) { | |
- unsigned char type = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)]; | |
- if (type == k_char_type_freq) { | |
- israre[c] = 0; | |
- freq_chars.push_back(c); | |
- freq_map[c] = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1]; | |
- } else if (type == k_char_type_rare) { | |
- rare_chars.push_back(c); | |
- rare_map[c] = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1]; | |
- freq_map[c] = freq_cnt - 1; | |
- } | |
- } | |
- | |
- if (rare_chars.empty()) { | |
- rare_cnt_log = 0; | |
- rare_cnt = 0; | |
- } | |
- | |
- long sblock_id = (cblock_beg >> k_sblock_size_log); | |
- std::copy(r.m_sblock_header + (sblock_id << 8), r.m_sblock_header + (sblock_id << 8) + k_sigma, sblock_h); | |
- for (long j = 0; j < k_sigma; ++j) off[j] = cur_count[j] - sblock_h[j]; | |
- | |
- long nofreq_cnt = 0; | |
- long freq_chars_size = (long)freq_chars.size(); | |
- long rare_chars_size = (long)rare_chars.size(); | |
- | |
- if (cblock_end <= r.m_length) { | |
- for (unsigned long i = cblock_beg; i < cblock_end; i += freq_cnt) { | |
- for (long j = 0; j < freq_chars_size; ++j) { | |
- unsigned char ch = freq_chars[j]; | |
- r.m_freq_trunk[i + j] = (off[ch] << 8); | |
- } | |
- r.m_freq_trunk[i + freq_cnt - 1] = (nofreq_cnt << 8); | |
- for (unsigned long j = i; j < i + freq_cnt; ++j) { | |
- unsigned char c = text[j]; | |
- r.m_freq_trunk[j] |= freq_map[c]; | |
- if (israre[c]) { | |
- if (!(nofreq_cnt & rare_cnt_mask)) { | |
- for (long jj = 0; jj < rare_chars_size; ++jj) { | |
- unsigned char ch = rare_chars[jj]; | |
- r.m_rare_trunk[r_filled++] = (off[ch] << 8); | |
- } | |
- r_filled += rare_cnt - rare_chars_size; | |
- } | |
- r.m_rare_trunk[r_ptr++] |= rare_map[c]; | |
- } | |
- ++off[c]; | |
- nofreq_cnt += israre[c]; | |
- } | |
- } | |
- for (long i = 0; i < k_sigma; ++i) | |
- cur_count[i] = sblock_h[i] + off[i]; | |
- } else { | |
- for (unsigned long i = cblock_beg; i < cblock_end; i += freq_cnt) { | |
- for (long j = 0; j < freq_chars_size; ++j) { | |
- unsigned char ch = freq_chars[j]; | |
- r.m_freq_trunk[i + j] = (off[ch] << 8); | |
- } | |
- r.m_freq_trunk[i + freq_cnt - 1] = (nofreq_cnt << 8); | |
- for (unsigned long j = i; j < i + freq_cnt; ++j) { | |
- unsigned char c = (j < r.m_length ? text[j] : 0); | |
- r.m_freq_trunk[j] |= freq_map[c]; | |
- if (israre[c]) { | |
- if (!(nofreq_cnt & rare_cnt_mask)) { | |
- for (long jj = 0; jj < rare_chars_size; ++jj) { | |
- unsigned char ch = rare_chars[jj]; | |
- r.m_rare_trunk[r_filled++] = (off[ch] << 8); | |
- } | |
- r_filled += rare_cnt - rare_chars_size; | |
- } | |
- r.m_rare_trunk[r_ptr++] |= rare_map[c]; | |
- } | |
- ++off[c]; | |
- nofreq_cnt += israre[c]; | |
- } | |
- } | |
- for (long i = 0; i < k_sigma; ++i) | |
- cur_count[i] = sblock_h[i] + off[i]; | |
- } | |
- | |
- for (long j = 0; j < rare_cnt; ++j) { | |
- unsigned char ch = (j < (long)rare_chars.size() ? rare_chars[j] : 0); | |
- long local_rank = cur_count[ch] - r.m_sblock_header[(sblock_id << 8) + ch]; | |
- r.m_rare_trunk[r_filled++] = (local_rank << 8); | |
- } | |
- } | |
- | |
- delete[] cur_count; | |
- delete[] sblock_h; | |
- delete[] freq_map; | |
- delete[] rare_map; | |
- delete[] israre; | |
- delete[] off; | |
- } | |
- | |
- inline long rank(long i, unsigned char c) const { | |
- if (i <= 0) return 0L; | |
- else if ((unsigned long)i >= m_length) return m_count[c]; | |
- | |
- unsigned long cblock_id = (i >> k_cblock_size_log); | |
- if (m_cblock_type[cblock_id >> 3] & (1 << (cblock_id & 7))) { // type-I cblock | |
- long cblock_beg = (i & k_cblock_size_mask_neg); | |
- long cblock_i = (i & k_cblock_size_mask); // offset in cblock | |
- | |
- // Extract the rank up to the start of cblock. | |
- long rank_up_to_cblock = (m_cblock_header2[(cblock_id << k_sigma_log) + c] >> (k_cblock_size_log + 6)); | |
- | |
- // Now we compute the number of occurrences of c inside the cblock. | |
- // First, decode the beginning and end of c's occurrence list. | |
- long list_beg = ((m_cblock_header2[(cblock_id << k_sigma_log) + c] >> 5) & k_2cblock_size_mask); | |
- long list_end = ((c == k_sigma - 1) ? k_cblock_size : | |
- ((m_cblock_header2[(cblock_id << k_sigma_log) + c + 1] >> 5) & k_2cblock_size_mask)); | |
- if (list_beg == list_end) return rank_up_to_cblock; | |
- | |
- // Compute the distance from i to the closest reference point on the left. | |
- long lookup_bits = (m_cblock_header2[(cblock_id << k_sigma_log) + c] & 31); | |
- long refpoint_dist_log = 31 - lookup_bits; | |
- long refpoint_disk_mask = (1L << refpoint_dist_log) - 1; | |
- long i_refpoint_offset = (cblock_i & refpoint_disk_mask); | |
- | |
- // Compute threshold of symbol c inside the current cblock. | |
- long threshold = (1L << (k_cblock_size_log - lookup_bits + 1)); | |
- | |
- // Compute the id of block containing i. | |
- long list_size = list_end - list_beg; | |
- long approx = ((cblock_i * list_size) >> k_cblock_size_log); | |
- | |
- // Extract the lookup table entry. | |
- long lookup_mask = (1L << lookup_bits) - 1; | |
- long begin = (m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask); | |
- | |
- // Empty block optimization. | |
- if (begin == list_size + 1) { | |
- // Block containing cblock_i is empty, just find the beginning. | |
- ++approx; | |
- while ((m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask) == list_size + 1) ++approx; | |
- begin = (m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask); | |
- return rank_up_to_cblock + begin; | |
- } | |
- | |
- long next_block_begin = (approx + 1 == list_size) ? list_size : | |
- (m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask); | |
- | |
- // Correct next_block_begin. | |
- if (approx + 1 != list_size && next_block_begin == list_size + 1) { | |
- ++approx; | |
- while ((m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask) == list_size + 1) ++approx; | |
- next_block_begin = (m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask); | |
- } | |
- | |
- // Correct the value of begin and return the answer. | |
- if (i_refpoint_offset >= threshold) { | |
- // Case 1: easy case, will happen most of the time. | |
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset) | |
- ++begin; | |
- | |
- return rank_up_to_cblock + begin; | |
- } else { | |
- // Case 2: executed very rarely. | |
- if (begin == next_block_begin || (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < (2 * threshold)) { | |
- // Case 2a: the value in the occ list was small -> the ref | |
- // point for i and for the block are the same, we | |
- // proceed as before, without modifying i_refpoint_offset. | |
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset) | |
- ++begin; | |
- | |
- return rank_up_to_cblock + begin; | |
- } else { | |
- // Case 2b: block occurrences were encoded wrt to the | |
- // previous ref point -> we increase i_refpoint_offset | |
- // by refpoint_dist and proceed as before. | |
- i_refpoint_offset += (1L << refpoint_dist_log); | |
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset) | |
- ++begin; | |
- | |
- return rank_up_to_cblock + begin; | |
- } | |
- } | |
- } else { // type-II cblock | |
- long sblock_id = (i >> k_sblock_size_log); | |
- long sblock_rank = m_sblock_header[(sblock_id << 8) + c]; | |
- | |
- unsigned char type = m_cblock_mapping[2 * (c * n_cblocks + cblock_id)]; | |
- unsigned char c_map = m_cblock_mapping[2 * (c * n_cblocks + cblock_id) + 1]; | |
- | |
- long freq_cnt_bits = (m_cblock_header[cblock_id] & 255L); | |
- long rare_cnt_bits = ((m_cblock_header[cblock_id] >> 8) & 255L); | |
- long block_id = (i >> freq_cnt_bits); | |
- | |
- if (type == k_char_type_freq) { | |
- // Case 1 (fastest): symbol c was frequent in the context block. | |
- // Answer a query using frequent trunk. | |
- long block_rank = m_freq_trunk[(block_id << freq_cnt_bits) + c_map] >> 8; | |
- long extra = 0; | |
- for (long j = (block_id << freq_cnt_bits); j < i; ++j) | |
- if ((m_freq_trunk[j] & 255) == c_map) ++extra; | |
- | |
- return sblock_rank + block_rank + extra; | |
- } else if (type == k_char_type_rare) { | |
- // Case 2: symbol c was rare inside the context block. | |
- // Compute new_i. | |
- long rare_trunk_ptr = (m_cblock_header[cblock_id] >> 16); | |
- long new_i = m_freq_trunk[((block_id + 1) << freq_cnt_bits) - 1] >> 8; | |
- for (long j = (block_id << freq_cnt_bits); j < i; ++j) | |
- if ((m_freq_trunk[j] & 255) + 1 == (1U << freq_cnt_bits)) ++new_i; | |
- | |
- // Answer a query on rare trunk. | |
- long rare_block_id = (new_i >> rare_cnt_bits); | |
- long block_rank = m_rare_trunk[rare_trunk_ptr + | |
- (rare_block_id << rare_cnt_bits) + c_map] >> 8; | |
- long extra = 0; | |
- for (long j = (rare_block_id << rare_cnt_bits); j < new_i; ++j) | |
- if ((m_rare_trunk[rare_trunk_ptr + j] & 255) == c_map) ++extra; | |
- | |
- return sblock_rank + block_rank + extra; | |
- } else { | |
- // Case 3: symbol c does not occur in the context block. | |
- // Find the first cblock where c occurrs. | |
- while (cblock_id < n_cblocks && (cblock_id & k_cblocks_in_sblock_mask) && | |
- m_cblock_mapping[2 * (c * n_cblocks + cblock_id)] == k_char_type_missing) | |
- ++cblock_id; | |
- | |
- if (cblock_id == n_cblocks) { | |
- // We reached the end of encoding, return count[c]. | |
- return m_count[c]; | |
- } else if (!(cblock_id & k_cblocks_in_sblock_mask)) { | |
- // We reached the boundary of superblock, | |
- // retreive the answer from superblock header. | |
- return m_sblock_header[256 * (cblock_id >> k_cblocks_in_sblock_log) + c]; | |
- } else { | |
- // We found cblock where c occurrs, but it wasn't on the | |
- // sblock boundary. In the recursive call this will either | |
- // be case 1 or case 2. | |
- return rank(cblock_id << k_cblock_size_log, c); | |
- } | |
- } | |
- } | |
- } | |
- | |
- ~rank4n() { | |
- if (m_length) { | |
- free(m_sblock_header); | |
- free(m_cblock_header); | |
- free(m_cblock_header2); | |
- free(m_cblock_mapping); | |
- free(m_cblock_type); | |
- free(m_freq_trunk); | |
- free(m_rare_trunk); | |
- } | |
- free(m_count); | |
- } | |
-}; | |
- | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned long rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblock_size = (1L << k_cblock_size_log); | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned long rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblock_size_mask = (1L << k_cblock_size_log) - 1; | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_2cblock_size = (2 << k_cblock_size_log); | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_2cblock_size_mask = (2 << k_cblock_size_log) - 1; | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_sigma = (1 << k_sigma_log); | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_sigma_mask = (1 << k_sigma_log) - 1; | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned long rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblock_size_mask_neg = ~((1L << k_cblock_size_log) - 1); | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblocks_in_sblock_log = k_sblock_size_log - k_cblock_size_log; | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblocks_in_sblock = (1 << (k_sblock_size_log - k_cblock_size_log)); | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_cblocks_in_sblock_mask = (1 << (k_sblock_size_log - k_cblock_size_log)) - 1; | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_sblock_size = (1 << k_sblock_size_log); | |
- | |
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log> | |
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log> | |
- ::k_sblock_size_mask = (1 << k_sblock_size_log) - 1; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_RANK_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/ranksel_support.h b/exttools/pSAscan-0.1.0/src/psascan_src/ranksel_support.h | |
deleted file mode 100644 | |
index debe9877..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/ranksel_support.h | |
+++ /dev/null | |
@@ -1,193 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/ranksel_support.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_RANKSEL_SUPPORT_H_INCLUDED | |
-#define __PSASCAN_SRC_RANKSEL_SUPPORT_H_INCLUDED | |
- | |
-#include <thread> | |
-#include <algorithm> | |
- | |
-#include "bitvector.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-struct ranksel_support { | |
- //============================================================================ | |
- // Compute sparse_rank[group_beg..group_end). | |
- //============================================================================ | |
- static void process_group_of_chunks(long group_beg, long group_end, | |
- long chunk_size, long *sparse_rank, const bitvector *bv) { | |
- for (long chunk_id = group_beg; chunk_id < group_end; ++chunk_id) { | |
- long chunk_beg = chunk_id * chunk_size; | |
- long chunk_end = chunk_beg + chunk_size; | |
- | |
- sparse_rank[chunk_id] = bv->range_sum(chunk_beg, chunk_end); | |
- } | |
- } | |
- | |
- | |
- //============================================================================ | |
- // Constructor. | |
- //============================================================================ | |
- ranksel_support(const bitvector *bv, long length, long max_threads) { | |
- m_bv = bv; | |
- m_length = length; | |
- | |
- // 1 | |
- // | |
- // Compute chunk size and allocate m_sparse_rank. | |
- m_chunk_size = std::min((1L << 20), (m_length + max_threads - 1) / max_threads); | |
- n_chunks = m_length / m_chunk_size; // we exclude the last partial chunk | |
- m_sparse_rank = (long *)malloc((n_chunks + 1) * sizeof(long)); | |
- | |
- // 2 | |
- // | |
- // Compute the sum of 1-bits inside each chunk and write to m_sparse_rank. | |
- // Since there can be more chunks than threads, we split chunks | |
- // into groups and let each thread handle the group of chunks. | |
- long chunk_max_group_size = (n_chunks + max_threads - 1) / max_threads; | |
- long n_chunk_groups = (n_chunks + chunk_max_group_size - 1) / chunk_max_group_size; | |
- | |
- std::thread **threads = new std::thread*[n_chunk_groups]; | |
- for (long t = 0; t < n_chunk_groups; ++t) { | |
- long chunk_group_beg = t * chunk_max_group_size; | |
- long chunk_group_end = std::min(chunk_group_beg + chunk_max_group_size, n_chunks); | |
- threads[t] = new std::thread(process_group_of_chunks, chunk_group_beg, | |
- chunk_group_end, m_chunk_size, m_sparse_rank, m_bv); | |
- } | |
- | |
- for (long t = 0; t < n_chunk_groups; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_chunk_groups; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- // 3 | |
- // | |
- // Compute partial (exclusive) sum on m_sparse_rank. | |
- long ones = 0L; | |
- for (long i = 0; i < n_chunks; ++i) { | |
- long temp = m_sparse_rank[i]; | |
- m_sparse_rank[i] = ones; | |
- ones += temp; | |
- } | |
- m_sparse_rank[n_chunks] = ones; | |
- } | |
- | |
- | |
- //============================================================================ | |
- // Find the largest position j such that the number of 0s in bv[0..j) is <= i. | |
- // In other words, find the position of i-th 0-bit in bv (i = 0, 1, ..). | |
- // 0 <= i < number of 0-bits in bv. | |
- //============================================================================ | |
- inline long select0(long i) const { | |
- // Fast-forward through chunks preceding the chunk with the answer. | |
- long j = 0L; | |
- while (j < n_chunks && ((j + 1) * m_chunk_size) - m_sparse_rank[j + 1] <= i) | |
- ++j; | |
- | |
- long zero_cnt_j = (j * m_chunk_size) - m_sparse_rank[j]; | |
- j *= m_chunk_size; | |
- | |
- // Find the final position in a single chunk. | |
- while (zero_cnt_j + (1 - m_bv->get(j)) <= i) | |
- zero_cnt_j += (1 - m_bv->get(j++)); | |
- | |
- return j; | |
- } | |
- | |
- | |
- //============================================================================ | |
- // Find the largest position j such that the number of 1s in bv[0..j) is <= i. | |
- // In other words, find the position of i-th 1-bit in bv (i = 0, 1, ..). | |
- // 0 <= i < number of 1-bits in bv. | |
- //============================================================================ | |
- inline long select1(long i) const { | |
- // Fast-forward through chunks preceding the chunk with the answer. | |
- long j = 0L; | |
- while (j < n_chunks && m_sparse_rank[j + 1] <= i) | |
- ++j; | |
- | |
- long rank_j = m_sparse_rank[j]; | |
- j *= m_chunk_size; | |
- | |
- // Find the final position in a single chunk. | |
- while (rank_j + m_bv->get(j) <= i) | |
- rank_j += m_bv->get(j++); | |
- | |
- return j; | |
- } | |
- | |
- //============================================================================ | |
- // Compute the number of 1-bits in bv[0..i) with the help of sparse_rank. | |
- // Note: | |
- // - i is an integer in the range from 0 to length of bv (inclusive), | |
- // - sparse_rank[k] = number of 1-bits in bv[0..k * chunk_size), | |
- //============================================================================ | |
- inline long rank(long i) const { | |
- long j = i / m_chunk_size; | |
- long result = m_sparse_rank[j]; | |
- j *= m_chunk_size; | |
- | |
- while (j < i) | |
- result += m_bv->get(j++); | |
- | |
- return result; | |
- } | |
- | |
- | |
- //============================================================================ | |
- // Compute the number of 0-bits in bv[0..i). | |
- // 0 <= i <= m_length. | |
- //============================================================================ | |
- inline long rank0(long i) const { | |
- return i - rank(i); | |
- } | |
- | |
- | |
- ~ranksel_support() { | |
- free(m_sparse_rank); | |
- } | |
- | |
- long m_length; // length of bitvector | |
- long m_chunk_size; // chunk size | |
- long n_chunks; // number of chunks | |
- long *m_sparse_rank; | |
- | |
- const bitvector *m_bv; | |
-}; | |
- | |
-} // psascan_private | |
- | |
-#endif // __PSASCAN_SRC_RANKSEL_SUPPORT_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/sparse_isa.h b/exttools/pSAscan-0.1.0/src/psascan_src/sparse_isa.h | |
deleted file mode 100644 | |
index 3d872e1e..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/sparse_isa.h | |
+++ /dev/null | |
@@ -1,169 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/sparse_isa.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section DESCRIPTION | |
- * | |
- * Sparse ISA encoding based on the ISAs algorithm computing | |
- * Lempel-Ziv (LZ77) factorization described in | |
- * | |
- * Dominik Kempa, Simon J. Puglisi: | |
- * Lempel-Ziv factorization: Simple, fast, practical. | |
- * In Proc. ALENEX 2013, p. 103-112. | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_SPARSE_ISA_H_INCLUDED | |
-#define __PSASCAN_SRC_SPARSE_ISA_H_INCLUDED | |
- | |
-#include <algorithm> | |
-#include <thread> | |
- | |
- | |
-namespace psascan_private { | |
- | |
-template<typename approx_rank_type, typename saidx_t, long k_sampling_rate_log> | |
-struct sparse_isa { | |
- private: | |
- long m_length; | |
- long m_last_isa; | |
- long m_i0; | |
- | |
- long *m_count; | |
- long *m_sparse_isa; | |
- | |
- const saidx_t *m_psa; | |
- const unsigned char *m_text; | |
- const approx_rank_type *m_rank; | |
- | |
- static const long k_sampling_rate; | |
- static const long k_sampling_rate_mask; | |
- static const long k_sigma = 256; | |
- | |
- private: | |
- template<typename T> | |
- static void compute_sparse_isa_aux(const T *psa, long block_beg, | |
- long block_end, long psa_size, long *sparse_isa, long &last) { | |
- for (long j = block_beg; j < block_end; ++j) { | |
- long sa_j = (long)psa[j]; | |
- if (!(sa_j & k_sampling_rate_mask)) | |
- sparse_isa[sa_j >> k_sampling_rate_log] = j; | |
- if (sa_j == psa_size - 1) last = j; | |
- } | |
- } | |
- | |
- public: | |
- sparse_isa(const saidx_t *psa, const unsigned char *text, long length, | |
- long i0, const approx_rank_type *rank, long max_threads) { | |
- m_psa = psa; | |
- m_length = length; | |
- m_rank = rank; | |
- m_text = text; | |
- m_i0 = i0; | |
- | |
- long elems = (m_length + k_sampling_rate - 1) / k_sampling_rate + 1; | |
- m_sparse_isa = (long *)malloc(elems * sizeof(long)); | |
- | |
- long max_block_size = (m_length + max_threads - 1) / max_threads; | |
- long n_blocks = (m_length + max_block_size - 1) / max_block_size; | |
- | |
- std::thread **threads = new std::thread*[n_blocks]; | |
- for (long t = 0; t < n_blocks; ++t) { | |
- long block_beg = t * max_block_size; | |
- long block_end = std::min(block_beg + max_block_size, m_length); | |
- | |
- threads[t] = new std::thread(compute_sparse_isa_aux<saidx_t>, m_psa, | |
- block_beg, block_end, m_length, m_sparse_isa, std::ref(m_last_isa)); | |
- } | |
- | |
- for (long t = 0; t < n_blocks; ++t) threads[t]->join(); | |
- for (long t = 0; t < n_blocks; ++t) delete threads[t]; | |
- delete[] threads; | |
- | |
- m_count = (long *)malloc(k_sigma * sizeof(long)); | |
- std::copy(rank->m_count, rank->m_count + k_sigma, m_count); | |
- ++m_count[text[length - 1]]; | |
- --m_count[0]; | |
- | |
- for (long i = 0, s = 0; i < k_sigma; ++i) { | |
- long t = m_count[i]; | |
- m_count[i] = s; | |
- s += t; | |
- } | |
- } | |
- | |
- inline long query(long j) const { | |
- long isa_i; | |
- long i = ((j + k_sampling_rate - 1) >> k_sampling_rate_log); | |
- if ((i << k_sampling_rate_log) < m_length) { | |
- isa_i = (long)m_sparse_isa[i]; | |
- i <<= k_sampling_rate_log; | |
- } else { | |
- isa_i = m_last_isa; | |
- i = m_length - 1; | |
- } | |
- | |
- while (i != j) { | |
- // Compute ISA[i - 1] from ISA[i]. | |
- // Invariant: | |
- // isa_i = ISA[i] | |
- // j <= i | |
- unsigned char c = m_text[i - 1]; | |
- int delta = (isa_i > m_i0 && c == 0); | |
- | |
- isa_i = m_count[c] + m_rank->rank(isa_i, c) - delta; | |
- while (isa_i < 0 || (long)m_psa[isa_i] != i - 1) | |
- ++isa_i; | |
- | |
- --i; | |
- } | |
- | |
- return isa_i; | |
- } | |
- | |
- ~sparse_isa() { | |
- free(m_sparse_isa); | |
- free(m_count); | |
- } | |
-}; | |
- | |
-template<typename approx_rank_type, typename saidx_t, long k_sampling_rate_log> | |
-const long sparse_isa<approx_rank_type, saidx_t, k_sampling_rate_log> | |
- ::k_sampling_rate = (1L << k_sampling_rate_log); | |
- | |
-template<typename approx_rank_type, typename saidx_t, long k_sampling_rate_log> | |
-const long sparse_isa<approx_rank_type, saidx_t, k_sampling_rate_log> | |
- ::k_sampling_rate_mask = (1L << k_sampling_rate_log) - 1; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_SPARSE_ISA_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/stream.h b/exttools/pSAscan-0.1.0/src/psascan_src/stream.h | |
deleted file mode 100644 | |
index fcbd1823..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/stream.h | |
+++ /dev/null | |
@@ -1,265 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/stream.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_STREAM_H_INCLUDED | |
-#define __PSASCAN_SRC_STREAM_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstring> | |
-#include <string> | |
-#include <mutex> | |
-#include <algorithm> | |
- | |
-#include "utils.h" | |
-#include "rank.h" | |
-#include "gap_buffer.h" | |
-#include "update.h" | |
-#include "stream_info.h" | |
-#include "multifile.h" | |
-#include "multifile_bit_stream_reader.h" | |
-#include "async_multifile_bit_stream_reader.h" | |
-#include "async_backward_skip_stream_reader.h" | |
-#include "async_bit_stream_writer.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-std::mutex stdout_mutex; | |
- | |
-template<typename block_offset_type> | |
-void parallel_stream( | |
- gap_buffer_poll<block_offset_type> *full_gap_buffers, | |
- gap_buffer_poll<block_offset_type> *empty_gap_buffers, | |
- long stream_block_beg, | |
- long stream_block_end, | |
- block_offset_type i, | |
- const long *count, | |
- block_offset_type whole_suffix_rank, | |
- const rank4n<> *rank, | |
- unsigned char last, | |
- std::string text_filename, | |
- long length, | |
- std::string &tail_gt_filename, | |
- stream_info *info, | |
- int thread_id, | |
- long gap_range_size, | |
- long gap_buf_size, | |
- const multifile *tail_gt_begin, | |
- long n_increasers) { | |
- | |
- static const int max_buckets = 4096; | |
- int *block_id_to_sblock_id = new int[max_buckets]; | |
- | |
- long bucket_size = 1; | |
- long bucket_size_bits = 0; | |
- while ((gap_range_size + bucket_size - 1) / bucket_size > max_buckets) | |
- bucket_size <<= 1, ++bucket_size_bits; | |
- long n_buckets = (gap_range_size + bucket_size - 1) / bucket_size; | |
- int *block_count = new int[n_buckets]; | |
- | |
- long max_buffer_elems = gap_buf_size / sizeof(block_offset_type); | |
- block_offset_type *temp = new block_offset_type[max_buffer_elems]; | |
- int *oracle = new int[max_buffer_elems]; | |
- | |
- static const long buffer_sample_size = 512; | |
- std::vector<block_offset_type> samples(buffer_sample_size); | |
- long *ptr = new long[n_increasers]; | |
- block_offset_type *bucket_lbound = new block_offset_type[n_increasers + 1]; | |
- | |
- typedef async_multifile_bit_stream_reader bit_stream_reader_type; | |
- typedef async_backward_skip_stream_reader<unsigned char> text_reader_type; | |
- typedef async_bit_stream_writer bit_stream_writer_type; | |
- | |
- text_reader_type *text_streamer = new text_reader_type(text_filename, length - stream_block_end, 4L << 20); | |
- bit_stream_writer_type *gt_out = new bit_stream_writer_type(tail_gt_filename, 1L << 20); | |
- bit_stream_reader_type gt_in(tail_gt_begin, length - stream_block_end, 1L << 20); | |
- | |
- long j = stream_block_end, dbg = 0L; | |
- while (j > stream_block_beg) { | |
- if (dbg > (1 << 26)) { | |
- info->m_mutex.lock(); | |
- info->m_streamed[thread_id] = stream_block_end - j; | |
- info->m_update_count += 1; | |
- if (info->m_update_count == info->m_thread_count) { | |
- info->m_update_count = 0L; | |
- long double elapsed = utils::wclock() - info->m_timestamp; | |
- long total_streamed = 0L; | |
- | |
- for (long t = 0; t < info->m_thread_count; ++t) | |
- total_streamed += info->m_streamed[t]; | |
- long double speed = (total_streamed / (1024.L * 1024)) / elapsed; | |
- | |
- stdout_mutex.lock(); | |
- fprintf(stderr, "\r Stream: %.2Lf%%. Time: %.2Lf. Speed: %.2LfMiB/s", | |
- (total_streamed * 100.L) / info->m_tostream, elapsed, speed); | |
- stdout_mutex.unlock(); | |
- } | |
- info->m_mutex.unlock(); | |
- dbg = 0L; | |
- } | |
- | |
- // Get a gap buffer from the poll of empty buffers. | |
- std::unique_lock<std::mutex> lk(empty_gap_buffers->m_mutex); | |
- while (!empty_gap_buffers->available()) | |
- empty_gap_buffers->m_cv.wait(lk); | |
- | |
- gap_buffer<block_offset_type> *b = empty_gap_buffers->get(); | |
- lk.unlock(); | |
- empty_gap_buffers->m_cv.notify_one(); // let others know they should re-check | |
- | |
- // Process buffer -- fill with gap values. | |
- long left = j - stream_block_beg; | |
- b->m_filled = std::min(left, b->m_size); | |
- dbg += b->m_filled; | |
- std::fill(block_count, block_count + n_buckets, 0); | |
- | |
- for (long t = 0L; t < b->m_filled; ++t, --j) { | |
- unsigned char c = text_streamer->read(); | |
- | |
- gt_out->write(i > whole_suffix_rank); | |
- bool next_gt = (gt_in.read()); | |
- | |
- int delta = (i > whole_suffix_rank && c == 0); | |
- i = (block_offset_type)(count[c] + rank->rank((long)i, c) - delta); | |
- if (c == last && next_gt) ++i; | |
- temp[t] = i; | |
- block_count[i >> bucket_size_bits]++; | |
- } | |
- | |
- // Compute super-buckets. | |
- long ideal_sblock_size = (b->m_filled + n_increasers - 1) / n_increasers; | |
- long max_sbucket_size = 0; | |
- long bucket_id_beg = 0; | |
- for (long t = 0; t < n_increasers; ++t) { | |
- long bucket_id_end = bucket_id_beg, size = 0L; | |
- while (bucket_id_end < n_buckets && size < ideal_sblock_size) | |
- size += block_count[bucket_id_end++]; | |
- b->sblock_size[t] = size; | |
- max_sbucket_size = std::min(max_sbucket_size, size); | |
- for (long id = bucket_id_beg; id < bucket_id_end; ++id) | |
- block_id_to_sblock_id[id] = t; | |
- bucket_id_beg = bucket_id_end; | |
- } | |
- | |
- if (max_sbucket_size < 4L * ideal_sblock_size) { | |
- for (long t = 0, curbeg = 0; t < n_increasers; curbeg += b->sblock_size[t++]) | |
- b->sblock_beg[t] = ptr[t] = curbeg; | |
- | |
- // Permute the elements of the buffer. | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- long id = (temp[t] >> bucket_size_bits); | |
- long sblock_id = block_id_to_sblock_id[id]; | |
- oracle[t] = ptr[sblock_id]++; | |
- } | |
- | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- long addr = oracle[t]; | |
- b->m_content[addr] = temp[t]; | |
- } | |
- } else { | |
- // Repeat the partition into sbuckets, this time using random sample. | |
- // This is a fallback mechanism in case the quick partition failed. | |
- // It is not suppose to happen to often. | |
- | |
- // Compute random sample of elements in the buffer. | |
- for (long t = 0; t < buffer_sample_size; ++t) | |
- samples[t] = temp[utils::random_long(0L, b->m_filled - 1)]; | |
- std::sort(samples.begin(), samples.end()); | |
- samples.erase(std::unique(samples.begin(), samples.end()), samples.end()); | |
- | |
- // Compute bucket boundaries (lower bound is enough). | |
- std::fill(bucket_lbound, bucket_lbound + n_increasers + 1, gap_range_size); | |
- | |
- long step = (samples.size() + n_increasers - 1) / n_increasers; | |
- for (size_t t = 1, p = step; p < samples.size(); ++t, p += step) | |
- bucket_lbound[t] = (samples[p - 1] + samples[p] + 1) / 2; | |
- bucket_lbound[0] = 0; | |
- | |
- // Compute bucket sizes and sblock id into oracle array. | |
- std::fill(b->sblock_size, b->sblock_size + n_increasers, 0L); | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- block_offset_type x = temp[t]; | |
- int id = n_increasers; | |
- while (bucket_lbound[id] > x) --id; | |
- oracle[t] = id; | |
- b->sblock_size[id]++; | |
- } | |
- | |
- // Permute elements into their own buckets using oracle. | |
- for (long t = 0, curbeg = 0; t < n_increasers; curbeg += b->sblock_size[t++]) | |
- b->sblock_beg[t] = ptr[t] = curbeg; | |
- | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- long sblock_id = oracle[t]; | |
- oracle[t] = ptr[sblock_id]++; | |
- } | |
- | |
- for (long t = 0; t < b->m_filled; ++t) { | |
- long addr = oracle[t]; | |
- b->m_content[addr] = temp[t]; | |
- } | |
- } | |
- | |
- // Add the buffer to the poll of full buffers and notify waiting thread. | |
- std::unique_lock<std::mutex> lk2(full_gap_buffers->m_mutex); | |
- full_gap_buffers->add(b); | |
- lk2.unlock(); | |
- full_gap_buffers->m_cv.notify_one(); | |
- } | |
- | |
- delete text_streamer; | |
- delete gt_out; | |
- | |
- // Report that another worker thread has finished. | |
- std::unique_lock<std::mutex> lk(full_gap_buffers->m_mutex); | |
- full_gap_buffers->increment_finished_workers(); | |
- lk.unlock(); | |
- | |
- // Notify waiting update threads in case no more buffers | |
- // are going to be produces by worker threads. | |
- full_gap_buffers->m_cv.notify_one(); | |
- | |
- delete[] block_count; | |
- delete[] block_id_to_sblock_id; | |
- delete[] temp; | |
- delete[] oracle; | |
- delete[] ptr; | |
- delete[] bucket_lbound; | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_STREAM_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/stream_info.h b/exttools/pSAscan-0.1.0/src/psascan_src/stream_info.h | |
deleted file mode 100644 | |
index 49624b6b..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/stream_info.h | |
+++ /dev/null | |
@@ -1,85 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/stream_info.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_STREAM_INFO_H_INCLUDED | |
-#define __PSASCAN_SRC_STREAM_INFO_H_INCLUDED | |
- | |
-#include <mutex> | |
-#include <algorithm> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-//============================================================================= | |
-// Used to store progress information for different threads during streaming. | |
-//============================================================================= | |
-struct stream_info { | |
- stream_info(long thread_count, long tostream) | |
- : m_update_count(0L), | |
- m_thread_count(thread_count), | |
- m_tostream(tostream) { | |
- m_streamed = new long[thread_count]; | |
- std::fill(m_streamed, m_streamed + thread_count, 0L); | |
- | |
- m_idle_update = new long double[thread_count]; | |
- m_idle_work = new long double[thread_count]; | |
- std::fill(m_idle_update, m_idle_update + thread_count, 0.L); | |
- std::fill(m_idle_work, m_idle_work + thread_count, 0.L); | |
- | |
- m_timestamp = utils::wclock(); | |
- } | |
- | |
- ~stream_info() { | |
- delete[] m_streamed; | |
- delete[] m_idle_work; | |
- delete[] m_idle_update; | |
- } | |
- | |
- long m_update_count; // number of updates | |
- long m_thread_count; // number of threads | |
- long m_tostream; // total text length to stream | |
- long double m_timestamp; // when the streaming started | |
- long *m_streamed; // how many bytes streamed by each thread | |
- long double *m_idle_update; | |
- long double *m_idle_work; | |
- | |
- std::mutex m_mutex; | |
-}; | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_STREAM_INFO_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/uint40.h b/exttools/pSAscan-0.1.0/src/psascan_src/uint40.h | |
deleted file mode 100644 | |
index 938aca4a..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/uint40.h | |
+++ /dev/null | |
@@ -1,181 +0,0 @@ | |
-/****************************************************************************** | |
- * | |
- * Class representing a 40-bit unsigned integer encoded in five bytes. | |
- * | |
- ****************************************************************************** | |
- * Copyright (C) 2012 Timo Bingmann <tb@panthema.net> | |
- * | |
- * This program is free software: you can redistribute it and/or modify it | |
- * under the terms of the GNU General Public License as published by the Free | |
- * Software Foundation, either version 3 of the License, or (at your option) | |
- * any later version. | |
- * | |
- * This program is distributed in the hope that it will be useful, but WITHOUT | |
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
- * more details. | |
- * | |
- * You should have received a copy of the GNU General Public License along with | |
- * this program. If not, see <http://www.gnu.org/licenses/>. | |
- ***************************************************************************** | |
- * | |
- * NOTE: This is slightly modified version of the file used in eSAIS-0.5.4 | |
- * (https://panthema.net/2012/1119-eSAIS-Inducing-Suffix-and-LCP-Arrays- | |
- * in-External-Memory/). In particular, it contains a small bugfix in the | |
- * += operator. | |
- * | |
- * Modified by Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- *****************************************************************************/ | |
- | |
- | |
-#ifndef __UINT40_H_INCLUDED | |
-#define __UINT40_H_INCLUDED | |
- | |
-#include <inttypes.h> | |
-#include <stdint.h> | |
-#include <cassert> | |
-#include <iostream> | |
-#include <limits> | |
-#include <unistd.h> | |
- | |
- | |
-class uint40 | |
-{ | |
-private: | |
- uint32_t low; | |
- uint8_t high; | |
- | |
-public: | |
- inline uint40() | |
- { | |
- } | |
- | |
- inline uint40(uint32_t l, uint8_t h) | |
- : low(l), high(h) | |
- { | |
- } | |
- | |
- inline uint40(const uint40& a) | |
- : low(a.low), high(a.high) | |
- { | |
- } | |
- | |
- inline uint40(const int& a) | |
- : low(a), high(0) | |
- { | |
- } | |
- | |
- inline uint40(const unsigned int& a) | |
- : low(a), high(0) | |
- { | |
- } | |
- | |
- inline uint40(const uint64_t& a) | |
- : low(a & 0xFFFFFFFF), high((a >> 32) & 0xFF) | |
- { | |
- assert( a <= 0xFFFFFFFFFFLU ); | |
- } | |
- | |
- inline uint40(const long& a) | |
- : low(a & 0xFFFFFFFFL), high((a >> 32) & 0xFF) { | |
- assert( a <= 0xFFFFFFFFFFL ); | |
- } | |
- | |
- inline uint64_t ull() const { | |
- return ((uint64_t)high) << 32 | (uint64_t)low; | |
- } | |
- | |
- inline long ll() const | |
- { | |
- return (long)ull(); | |
- } | |
- | |
- inline operator uint64_t() const | |
- { | |
- return ull(); | |
- } | |
- | |
- inline uint64_t u64() const | |
- { | |
- return ((uint64_t)high) << 32 | (uint64_t)low; | |
- } | |
- | |
- inline uint40& operator++ () | |
- { | |
- if (low == std::numeric_limits<uint32_t>::max()) | |
- ++high, low = 0; | |
- else | |
- ++low; | |
- return *this; | |
- } | |
- | |
- inline uint40& operator-- () | |
- { | |
- if (low == 0) | |
- --high, low = std::numeric_limits<uint32_t>::max(); | |
- else | |
- --low; | |
- return *this; | |
- } | |
- | |
- inline uint40& operator+= (const uint40& b) | |
- { | |
- uint64_t add = (uint64_t)low + b.low; // BUGFIX | |
- low = add & 0xFFFFFFFF; | |
- high += b.high + ((add >> 32) & 0xFF); | |
- return *this; | |
- } | |
- | |
- inline bool operator== (const uint40& b) const | |
- { | |
- return (low == b.low) && (high == b.high); | |
- } | |
- | |
- inline bool operator!= (const uint40& b) const | |
- { | |
- return (low != b.low) || (high != b.high); | |
- } | |
- | |
- inline bool operator< (const uint40& b) const | |
- { | |
- return (high < b.high) || (high == b.high && low < b.low); | |
- } | |
- | |
- inline bool operator<= (const uint40& b) const | |
- { | |
- return (high < b.high) || (high == b.high && low <= b.low); | |
- } | |
- | |
- inline bool operator> (const uint40& b) const | |
- { | |
- return (high > b.high) || (high == b.high && low > b.low); | |
- } | |
- | |
- inline bool operator>= (const uint40& b) const | |
- { | |
- return (high > b.high) || (high == b.high && low >= b.low); | |
- } | |
- | |
- friend std::ostream& operator<< (std::ostream& os, const uint40& a) | |
- { | |
- return os << a.ull(); | |
- } | |
- | |
-} __attribute__((packed)); | |
- | |
-namespace std { | |
- | |
-template<> | |
-class numeric_limits<uint40> { | |
-public: | |
- static uint40 min() { return uint40(std::numeric_limits<uint32_t>::min(), | |
- std::numeric_limits<uint8_t>::min()); } | |
- | |
- static uint40 max() { return uint40(std::numeric_limits<uint32_t>::max(), | |
- std::numeric_limits<uint8_t>::max()); } | |
-}; | |
- | |
-} | |
- | |
-#endif // __UINT40_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/update.h b/exttools/pSAscan-0.1.0/src/psascan_src/update.h | |
deleted file mode 100644 | |
index 4e757943..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/update.h | |
+++ /dev/null | |
@@ -1,226 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/update.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_UPDATE_H_INCLUDED | |
-#define __PSASCAN_SRC_UPDATE_H_INCLUDED | |
- | |
-#include <thread> | |
-#include <mutex> | |
-#include <condition_variable> | |
-#include <algorithm> | |
- | |
-#include "utils.h" | |
-#include "gap_buffer.h" | |
-#include "gap_array.h" | |
-#include "stream_info.h" | |
- | |
- | |
-namespace psascan_private { | |
- | |
-//============================================================================== | |
-// This object creates a given number of threads that will perform gap array | |
-// updates. Most of the time all threads are sleeping on a conditional variable. | |
-// Once the gap buffer is available for processing, they are all woken up and | |
-// perform the update in parallel. The caller then waits until all threads are | |
-// finished and then puts the gap buffer in the poll of empty buffers. | |
-// | |
-// Only one object of this class should exist. | |
-//============================================================================== | |
-template<typename block_offset_type> | |
-struct gap_parallel_updater { | |
- | |
- template<typename T> | |
- static void parallel_update(gap_parallel_updater<T> *updater, int id) { | |
- while (true) { | |
- // Wait until there is a gap buffer available or the | |
- // message 'no more buffers' arrives. | |
- std::unique_lock<std::mutex> lk(updater->m_avail_mutex); | |
- while (!(updater->m_avail[id]) && !(updater->m_avail_no_more)) | |
- updater->m_avail_cv.wait(lk); | |
- | |
- if (!(updater->m_avail[id]) && updater->m_avail_no_more) { | |
- // No more buffers -- exit. | |
- lk.unlock(); | |
- return; | |
- } | |
- | |
- updater->m_avail[id] = false; | |
- lk.unlock(); | |
- | |
- // Safely perform the update. | |
- gap_buffer<T> *buf = updater->m_buffer; | |
- buffered_gap_array *gap = updater->m_gap_array; | |
- int beg = buf->sblock_beg[id]; | |
- int end = beg + buf->sblock_size[id]; | |
- | |
- for (int i = beg; i < end; ++i) { | |
- T x = buf->m_content[i]; | |
- gap->m_count[x]++; | |
- | |
- // Check if values wrapped-around. | |
- if (gap->m_count[x] == 0L) { | |
- gap->m_excess_mutex.lock(); | |
- gap->add_excess(x); | |
- gap->m_excess_mutex.unlock(); | |
- } | |
- } | |
- | |
- // Update the number of finished threads. | |
- bool finished_last = false; | |
- std::unique_lock<std::mutex> lk2(updater->m_finished_mutex); | |
- updater->m_finished++; | |
- if (updater->m_finished == updater->m_threads_cnt) | |
- finished_last = true; | |
- lk2.unlock(); | |
- | |
- // If this was the last thread finishing, let the caller know. | |
- if (finished_last) | |
- updater->m_finished_cv.notify_one(); | |
- } | |
- } | |
- | |
- gap_parallel_updater(buffered_gap_array *gap_array, int threads_cnt) | |
- : m_gap_array(gap_array), | |
- m_threads_cnt(threads_cnt), | |
- m_avail_no_more(false) { | |
- m_avail = new bool[m_threads_cnt]; | |
- std::fill(m_avail, m_avail + m_threads_cnt, false); | |
- m_threads = new std::thread*[m_threads_cnt]; | |
- | |
- // After this, threads immediately hang up on m_avail_cv. | |
- for (int i = 0; i < m_threads_cnt; ++i) | |
- m_threads[i] = new std::thread(parallel_update<block_offset_type>, this, i); | |
- } | |
- | |
- ~gap_parallel_updater() { | |
- // Signal all threads to finish. | |
- std::unique_lock<std::mutex> lk(m_avail_mutex); | |
- m_avail_no_more = true; | |
- lk.unlock(); | |
- m_avail_cv.notify_all(); | |
- | |
- // Wait until all threads finish and release memory. | |
- for (int i = 0; i < m_threads_cnt; ++i) { | |
- m_threads[i]->join(); | |
- delete m_threads[i]; | |
- } | |
- delete[] m_threads; | |
- delete[] m_avail; | |
- } | |
- | |
- void update(gap_buffer<block_offset_type> *buffer) { | |
- // Prepare a message for each thread that new buffer is available. | |
- std::unique_lock<std::mutex> lk(m_avail_mutex); | |
- m_finished = 0; | |
- m_buffer = buffer; | |
- for (int i = 0; i < m_threads_cnt; ++i) | |
- m_avail[i] = true; | |
- lk.unlock(); | |
- | |
- // Wake up all threads to perform the update. | |
- m_avail_cv.notify_all(); | |
- | |
- // Wait until all threads report that they are done. | |
- std::unique_lock<std::mutex> lk2(m_finished_mutex); | |
- while (m_finished != m_threads_cnt) | |
- m_finished_cv.wait(lk2); | |
- lk2.unlock(); | |
- | |
- // We are done processing the buffer. The caller of this method | |
- // can now place the buffer into the poll of empty buffers. | |
- } | |
- | |
-private: | |
- buffered_gap_array *m_gap_array; | |
- | |
- std::thread **m_threads; | |
- int m_threads_cnt; | |
- | |
- gap_buffer<block_offset_type> *m_buffer; | |
- | |
- // For notifying threads about available buffer. | |
- std::mutex m_avail_mutex; | |
- std::condition_variable m_avail_cv; | |
- bool *m_avail; | |
- bool m_avail_no_more; | |
- | |
- // The mutex below is to protect m_finished. The condition | |
- // variable allows the caller to wait (and to be notified when done) | |
- // until threads complete processing their section of the buffer. | |
- int m_finished; | |
- std::mutex m_finished_mutex; | |
- std::condition_variable m_finished_cv; | |
-}; | |
- | |
-template<typename block_offset_type> | |
-void gap_updater(gap_buffer_poll<block_offset_type> *full_gap_buffers, | |
- gap_buffer_poll<block_offset_type> *empty_gap_buffers, | |
- buffered_gap_array *gap, long n_increasers) { | |
- | |
- gap_parallel_updater<block_offset_type> *updater = | |
- new gap_parallel_updater<block_offset_type>(gap, n_increasers); | |
- | |
- while (true) { | |
- // Get a buffer from the poll of full buffers. | |
- std::unique_lock<std::mutex> lk(full_gap_buffers->m_mutex); | |
- while (!full_gap_buffers->available() && !full_gap_buffers->finished()) | |
- full_gap_buffers->m_cv.wait(lk); | |
- | |
- if (!full_gap_buffers->available() && full_gap_buffers->finished()) { | |
- // There will be no more full buffers -- exit. | |
- lk.unlock(); | |
- break; | |
- } | |
- | |
- gap_buffer<block_offset_type> *b = full_gap_buffers->get(); | |
- lk.unlock(); | |
- | |
- // Process buffer. | |
- updater->update(b); | |
- | |
- // Add the buffer to the poll of empty buffers and notify | |
- // the waiting thread. | |
- std::unique_lock<std::mutex> lk2(empty_gap_buffers->m_mutex); | |
- empty_gap_buffers->add(b); | |
- lk2.unlock(); | |
- empty_gap_buffers->m_cv.notify_one(); | |
- } | |
- | |
- delete updater; | |
-} | |
- | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_UPDATE_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/utils.cpp b/exttools/pSAscan-0.1.0/src/psascan_src/utils.cpp | |
deleted file mode 100644 | |
index a6eb7f08..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/utils.cpp | |
+++ /dev/null | |
@@ -1,169 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/utils.cpp | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <cstring> | |
-#include <errno.h> | |
-#include <stdint.h> | |
-#include <unistd.h> | |
-#include <sys/time.h> | |
-#include <string> | |
-#include <fstream> | |
-#include <algorithm> | |
- | |
-#include "utils.h" | |
- | |
- | |
-namespace psascan_private { | |
-namespace utils { | |
- | |
-long double wclock() { | |
- timeval tim; | |
- gettimeofday(&tim, NULL); | |
- | |
- return tim.tv_sec + (tim.tv_usec / 1000000.0L); | |
-} | |
- | |
-std::FILE *open_file(std::string fname, std::string mode) { | |
- std::FILE *f = std::fopen(fname.c_str(), mode.c_str()); | |
- if (f == NULL) { | |
- std::perror(fname.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- return f; | |
-} | |
- | |
-long file_size(std::string fname) { | |
- std::FILE *f = open_file(fname, "rt"); | |
- std::fseek(f, 0L, SEEK_END); | |
- long size = std::ftell(f); | |
- std::fclose(f); | |
- | |
- return size; | |
-} | |
- | |
-bool file_exists(std::string fname) { | |
- std::FILE *f = std::fopen(fname.c_str(), "r"); | |
- bool ret = (f != NULL); | |
- if (f != NULL) | |
- std::fclose(f); | |
- | |
- return ret; | |
-} | |
- | |
-void file_delete(std::string fname) { | |
- int res = std::remove(fname.c_str()); | |
- if (res) { | |
- fprintf(stderr, "Failed to delete %s: %s\n", | |
- fname.c_str(), strerror(errno)); | |
- std::exit(EXIT_FAILURE); | |
- } | |
-} | |
- | |
-std::string absolute_path(std::string fname) { | |
- char path[1 << 12]; | |
- bool created = false; | |
- | |
- if (!file_exists(fname)) { | |
- // We need to create the file, since realpath fails on non-existing files. | |
- std::fclose(open_file(fname, "w")); | |
- created = true; | |
- } | |
- if (!realpath(fname.c_str(), path)) { | |
- fprintf(stderr, "\nError: realpath failed for %s\n", fname.c_str()); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- if (created) | |
- file_delete(fname); | |
- | |
- return std::string(path); | |
-} | |
- | |
-void read_block(std::FILE *f, long beg, long length, unsigned char *b) { | |
- std::fseek(f, beg, SEEK_SET); | |
- read_n_objects_from_file<unsigned char>(b, length, f); | |
-} | |
- | |
-void read_block(std::string fname, long beg, long length, unsigned char *b) { | |
- std::FILE *f = open_file(fname.c_str(), "r"); | |
- read_block(f, beg, length, b); | |
- std::fclose(f); | |
-} | |
- | |
-int random_int(int p, int r) { | |
- return p + rand() % (r - p + 1); | |
-} | |
- | |
-long random_long(long p, long r) { | |
- long x = random_int(0, 1000000000); | |
- long y = random_int(0, 1000000000); | |
- long z = x * 1000000000L + y; | |
- return p + z % (r - p + 1); | |
-} | |
- | |
-void fill_random_string(unsigned char* &s, long length, int sigma) { | |
- for (long i = 0; i < length; ++i) | |
- s[i] = random_int(0, sigma - 1); | |
-} | |
- | |
-void fill_random_letters(unsigned char* &s, long n, int sigma) { | |
- fill_random_string(s, n, sigma); | |
- for (long i = 0; i < n; ++i) s[i] += 'a'; | |
-} | |
- | |
-std::string random_string_hash() { | |
- uint64_t hash = (uint64_t)rand() * RAND_MAX + rand(); | |
- std::stringstream ss; | |
- ss << hash; | |
- return ss.str(); | |
-} | |
- | |
-long log2ceil(long x) { | |
- long pow2 = 1, w = 0; | |
- while (pow2 < x) { pow2 <<= 1; ++w; } | |
- return w; | |
-} | |
- | |
-long log2floor(long x) { | |
- long pow2 = 1, w = 0; | |
- while ((pow2 << 1) <= x) { pow2 <<= 1; ++w; } | |
- return w; | |
-} | |
- | |
-} // namespace utils | |
-} // namespace psascan_private | |
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/utils.h b/exttools/pSAscan-0.1.0/src/psascan_src/utils.h | |
deleted file mode 100644 | |
index e5f3522f..00000000 | |
--- a/exttools/pSAscan-0.1.0/src/psascan_src/utils.h | |
+++ /dev/null | |
@@ -1,145 +0,0 @@ | |
-/** | |
- * @file src/psascan_src/utils.h | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#ifndef __PSASCAN_SRC_PSASCAN_UTILS_H_INCLUDED | |
-#define __PSASCAN_SRC_PSASCAN_UTILS_H_INCLUDED | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <string> | |
-#include <sstream> | |
- | |
- | |
-namespace psascan_private { | |
-namespace utils { | |
- | |
-#define STRX(x) #x | |
-#define STR(x) STRX(x) | |
- | |
-// Time | |
-long double wclock(); | |
- | |
-// Basic file handling | |
-std::FILE *open_file(std::string fname, std::string mode); | |
-long file_size(std::string fname); | |
-bool file_exists(std::string fname); | |
-void file_delete(std::string fname); | |
-std::string absolute_path(std::string fname); | |
- | |
-// File I/O | |
-void read_block(std::string fname, long beg, long length, unsigned char *b); | |
-void read_block(std::FILE *f, long beg, long length, unsigned char *b); | |
- | |
-template<typename value_type> | |
-void write_objects_to_file(const value_type *tab, long length, std::string fname) { | |
- std::FILE *f = open_file(fname, "w"); | |
- size_t fwrite_ret = std::fwrite(tab, sizeof(value_type), length, f); | |
- if ((long)fwrite_ret != length) { | |
- fprintf(stderr, "\nError: fwrite in line %s of %s returned %ld\n", | |
- STR(__LINE__), STR(__FILE__), fwrite_ret); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- std::fclose(f); | |
-} | |
- | |
-template<typename value_type> | |
-void add_objects_to_file(const value_type *tab, long length, std::FILE *f) { | |
- size_t fwrite_ret = std::fwrite(tab, sizeof(value_type), length, f); | |
- if ((long)fwrite_ret != length) { | |
- fprintf(stderr, "\nError: fwrite in line %s of %s returned %lu\n", | |
- STR(__LINE__), STR(__FILE__), fwrite_ret); | |
- std::exit(EXIT_FAILURE); | |
- } | |
-} | |
- | |
-template<typename value_type> | |
-void add_objects_to_file(const value_type *tab, long length, std::string fname) { | |
- std::FILE *f = utils::open_file(fname.c_str(), "a"); | |
- add_objects_to_file<value_type>(tab, length, f); | |
- std::fclose(f); | |
-} | |
- | |
-template<typename value_type> | |
-void read_n_objects_from_file(value_type* tab, long length, std::FILE *f) { | |
- size_t fread_ret = std::fread(tab, sizeof(value_type), length, f); | |
- if ((long)fread_ret != length) { | |
- fprintf(stderr, "\nError: fread in line %s of %s returned %ld\n", | |
- STR(__LINE__), STR(__FILE__), fread_ret); | |
- std::exit(EXIT_FAILURE); | |
- } | |
-} | |
- | |
-template<typename value_type> | |
-void read_n_objects_from_file(value_type* tab, long length, std::string fname) { | |
- std::FILE *f = open_file(fname, "r"); | |
- read_n_objects_from_file<value_type>(tab, length, f); | |
- std::fclose(f); | |
-} | |
- | |
-template<typename value_type> | |
-void read_objects_from_file(value_type* &tab, long &length, std::string fname) { | |
- std::FILE *f = open_file(fname, "r"); | |
- std::fseek(f, 0L, SEEK_END); | |
- length = (long)(std::ftell(f) / sizeof(value_type)); | |
- std::rewind(f); | |
- tab = (value_type *)malloc(length * sizeof(value_type)); | |
- read_n_objects_from_file<value_type>(tab, length, f); | |
- std::fclose(f); | |
-} | |
- | |
-// Randomness | |
-int random_int(int p, int r); | |
-long random_long(long p, long r); | |
-void fill_random_string(unsigned char* &s, long length, int sigma); | |
-void fill_random_letters(unsigned char* &s, long n, int sigma); | |
-std::string random_string_hash(); | |
- | |
-// Math | |
-long log2ceil(long x); | |
-long log2floor(long x); | |
- | |
-// Misc | |
-template<typename int_type> | |
-std::string intToStr(int_type x) { | |
- std::stringstream ss; | |
- ss << x; | |
- return ss.str(); | |
-} | |
- | |
-} // namespace utils | |
-} // namespace psascan_private | |
- | |
-#endif // __PSASCAN_SRC_PSASCAN_UTILS_H_INCLUDED | |
diff --git a/exttools/pSAscan-0.1.0/tools/delete-bytes-255/Makefile b/exttools/pSAscan-0.1.0/tools/delete-bytes-255/Makefile | |
deleted file mode 100644 | |
index f6c7269c..00000000 | |
--- a/exttools/pSAscan-0.1.0/tools/delete-bytes-255/Makefile | |
+++ /dev/null | |
@@ -1,11 +0,0 @@ | |
-SHELL = /bin/sh | |
-CC = g++ | |
-CFLAGS = -Wall -Wextra -pedantic -Wshadow -funroll-loops -DNDEBUG -O3 -march=native | |
- | |
-all: delete255 | |
- | |
-delete255: | |
- $(CC) $(CFLAGS) -o delete255 main.cpp | |
- | |
-clean: | |
- /bin/rm -f delete255 *.o | |
diff --git a/exttools/pSAscan-0.1.0/tools/delete-bytes-255/main.cpp b/exttools/pSAscan-0.1.0/tools/delete-bytes-255/main.cpp | |
deleted file mode 100644 | |
index 3d6366d8..00000000 | |
--- a/exttools/pSAscan-0.1.0/tools/delete-bytes-255/main.cpp | |
+++ /dev/null | |
@@ -1,106 +0,0 @@ | |
-/** | |
- * @file tools/delete-bytes-255/main.cpp | |
- * @author Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * @section LICENCE | |
- * | |
- * This file is part of pSAscan v0.1.0 | |
- * See: http://www.cs.helsinki.fi/group/pads/ | |
- * | |
- * Copyright (C) 2014-2015 | |
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi> | |
- * Dominik Kempa <dominik.kempa (at) gmail.com> | |
- * | |
- * Permission is hereby granted, free of charge, to any person | |
- * obtaining a copy of this software and associated documentation | |
- * files (the "Software"), to deal in the Software without | |
- * restriction, including without limitation the rights to use, | |
- * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
- * copies of the Software, and to permit persons to whom the | |
- * Software is furnished to do so, subject to the following | |
- * conditions: | |
- * | |
- * The above copyright notice and this permission notice shall be | |
- * included in all copies or substantial portions of the Software. | |
- * | |
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
- * OTHER DEALINGS IN THE SOFTWARE. | |
- **/ | |
- | |
-#include <cstdio> | |
-#include <cstdlib> | |
-#include <sys/time.h> | |
- | |
- | |
-long double wallclock() { | |
- timeval tim; | |
- gettimeofday(&tim, NULL); | |
- return tim.tv_sec + (tim.tv_usec / 1000000.L); | |
-} | |
- | |
-int main(int argc, char **argv) { | |
- if (argc != 2) { | |
- std::fprintf(stderr, "Usage: %s FILE\nErase all bytes with value 255 " | |
- "from FILE. Write result on standard output.\n", argv[0]); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Open the input file. | |
- std::FILE *f = std::fopen(argv[1], "r"); | |
- if (f == NULL) { | |
- std::perror(argv[1]); | |
- std::exit(EXIT_FAILURE); | |
- } | |
- | |
- // Get the file size. | |
- std::fseek(f, 0L, SEEK_END); | |
- long size = std::ftell(f); | |
- std::rewind(f); | |
- | |
- // Allocate the buffer. | |
- static const long bufsize = (2L << 20); | |
- unsigned char *buffer = new unsigned char[bufsize]; | |
- | |
- // Do the filtering. | |
- long double start = wallclock(); | |
- std::size_t elems, count = 0, total = 0; | |
- while ((elems = std::fread(buffer, 1, bufsize, f)) > 0) { | |
- total += elems; | |
- count += elems; | |
- | |
- // Filter the buffer. | |
- std::size_t ptr = 0; | |
- for (std::size_t j = 0; j < elems; ++j) | |
- if (buffer[j] != 255) | |
- buffer[ptr++] = buffer[j]; | |
- | |
- // Write filtered buffer to stdout. | |
- if (ptr > 0) | |
- std::fwrite(buffer, 1, ptr, stdout); | |
- | |
- // Print progress message. | |
- if (count > (64L << 20)) { | |
- count = 0; | |
- long double elapsed = wallclock() - start; | |
- long double mib = (long double)total / (1L << 20); | |
- std::fprintf(stderr, "Processed %.0LfMiB (%.1Lf%%). Speed: %.2LfMiB/s.\r", | |
- mib, (100.L * total) / size, mib / elapsed); | |
- } | |
- } | |
- | |
- // Clean up. | |
- delete[] buffer; | |
- std::fclose(f); | |
- | |
- // Print summary. | |
- long double elapsed = wallclock() - start; | |
- long double mib = (long double)size / (1L << 20); | |
- std::fprintf(stderr, "Processed %.0LfMiB (100.0%%). Speed: %.2LfMiB/s.\n", | |
- mib, mib / elapsed); | |
-} | |
diff --git a/exttools/tools/CMakeLists.txt b/exttools/tools/CMakeLists.txt | |
deleted file mode 100644 | |
index 45d70375..00000000 | |
--- a/exttools/tools/CMakeLists.txt | |
+++ /dev/null | |
@@ -1,58 +0,0 @@ | |
-############################################################################ | |
-# CMakeLists.txt | |
-# | |
-# Part of a simple STXXL example. See http://stxxl.sourceforge.net | |
-# | |
-# Copyright (C) 2013 Timo Bingmann <tb@panthema.net> | |
-# | |
-# Distributed under the Boost Software License, Version 1.0. | |
-# (See accompanying file LICENSE_1_0.txt or copy at | |
-# http://www.boost.org/LICENSE_1_0.txt) | |
-############################################################################ | |
- | |
-# require cmake 2.6.4 (but please use 2.8.x) | |
-cmake_minimum_required(VERSION 2.6.4) | |
- | |
-# set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} /home/niki/opt) | |
-# set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} /home/niki/opt/lib/cmake/stxxl) | |
-# set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /home/niki/opt/lib) | |
-# set(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} /home/niki/opt/include) | |
-# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/home/niki/opt/include" ) | |
-# SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L/home/USERNAME/opt/lib" ) | |
- | |
-# we first give our project a name | |
-project(myproject) | |
-set(CXX_STANDARD c++11) | |
- | |
-# prohibit in-source builds | |
-if("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}") | |
- message(SEND_ERROR "In-source builds are not allowed.") | |
-endif("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}") | |
- | |
- | |
-# search for stxxl-config.cmake which contains the library's configuration | |
-find_package(STXXL REQUIRED) | |
- | |
-# print some info (this can be removed) | |
-message(STATUS "STXXL_CXX_FLAGS: ${STXXL_CXX_FLAGS}") | |
-message(STATUS "STXXL_INCLUDE_DIRS: ${STXXL_INCLUDE_DIRS}") | |
-message(STATUS "STXXL_LIBRARIES: ${STXXL_LIBRARIES}") | |
- | |
-# apply CXXFLAGS to our configuration | |
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${STXXL_CXX_FLAGS}") | |
- | |
-# enable warnings (always good) | |
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -W -Wall -O0 -ggdb") | |
- | |
-# add STXXL include directory | |
-include_directories(${STXXL_INCLUDE_DIRS}) | |
- | |
-# create and executable and linke with STXXL | |
-add_executable(isaandbwt isaandbwt.cpp) | |
-target_link_libraries(isaandbwt ${STXXL_LIBRARIES}) | |
- | |
-add_executable(readplcp readplcp.cpp) | |
-target_link_libraries(readplcp ${STXXL_LIBRARIES} /home/niki/opt/lib/libsdsl.a) | |
- | |
-add_executable(standardize standardize.cpp) | |
-target_link_libraries(standardize ${STXXL_LIBRARIES}) | |
diff --git a/exttools/tools/isaandbwt.cpp b/exttools/tools/isaandbwt.cpp | |
deleted file mode 100644 | |
index 1b22a327..00000000 | |
--- a/exttools/tools/isaandbwt.cpp | |
+++ /dev/null | |
@@ -1,221 +0,0 @@ | |
-/*************************************************************************** | |
- * test1.cpp | |
- * | |
- * Part of a simple STXXL example. See http://stxxl.sourceforge.net | |
- * | |
- * Copyright (C) 2013 Timo Bingmann <tb@panthema.net> | |
- * | |
- * Distributed under the Boost Software License, Version 1.0. | |
- * (See accompanying file LICENSE_1_0.txt or copy at | |
- * http://www.boost.org/LICENSE_1_0.txt) | |
- **************************************************************************/ | |
- | |
-#include <iostream> | |
-#include <limits> | |
- | |
-#include <stxxl/vector> | |
-#include <stxxl/random> | |
-#include <stxxl/sort> | |
-#include <stxxl/bits/algo/ksort.h> | |
-#include "/scripts/code/dcheck.hpp" | |
-// struct my_less_int : std::less<int> | |
-// { | |
-// int min_value() const { return std::numeric_limits<int>::min(); }; | |
-// int max_value() const { return std::numeric_limits<int>::max(); }; | |
-// }; | |
-// | |
-// int main(int argv,) | |
-// { | |
-// // create vector | |
-// stxxl::VECTOR_GENERATOR<int>::result vector; | |
-// | |
-// // fill vector with random integers | |
-// stxxl::random_number32 random; | |
-// | |
-// for (size_t i = 0; i < 100*1024*1024; ++i) { | |
-// vector.push_back(random()); | |
-// } | |
-// | |
-// // sort vector using 16 MiB RAM | |
-// stxxl::sort(vector.begin(), vector.end(), my_less_int(), 16*1024*1024); | |
-// | |
-// // output first and last items: | |
-// std::cout << vector.size() << " items sorted ranging from " | |
-// << vector.front() << " to " << vector.back() << std::endl; | |
-// | |
-// return 0; | |
-// } | |
- | |
- | |
- | |
-#include <iostream> | |
-#include <fstream> | |
-#include <stxxl/bits/common/uint_types.h> | |
- | |
-size_t filesize( const char*const filepath ){ | |
- std::ifstream file(filepath, std::ios::binary | std::ios::ate | std::ios::in); | |
- if(!file.good()) return 0; | |
- return file.tellg(); | |
-} | |
-bool file_exists(const char *const filepath) { | |
- std::ifstream infile(filepath); | |
- return infile.good(); | |
-} | |
- | |
- | |
-template<class int_t> | |
-class IntegerFileForwardIterator { | |
- const size_t m_size; | |
- std::ifstream m_is; | |
- size_t m_index; | |
- char m_buf[sizeof(int_t)]; | |
- public: | |
- | |
- IntegerFileForwardIterator(const char*const filename) | |
- : m_size { filesize(filename) } | |
- , m_is {filename, std::ios::binary | std::ios::in } | |
- , m_index {0} | |
- {} | |
- | |
- size_t size() const { return m_size/sizeof(int_t); } | |
- size_t index() const { return m_index; } | |
- int_t operator*() { return *reinterpret_cast<int_t*>(m_buf); } | |
- IntegerFileForwardIterator& operator++(int) { | |
- m_is.read(m_buf, sizeof(int_t)); | |
- ++m_index; | |
- return *this; | |
- } | |
-}; | |
- | |
-template<class int_t> | |
-class IntegerFileArray { | |
- const size_t m_size; | |
- std::ifstream m_is; | |
- public: | |
- IntegerFileArray(const char*const filename) | |
- : m_size { filesize(filename) } | |
- , m_is {filename, std::ios::binary | std::ios::in } | |
- {} | |
- int_t operator[](size_t i) { | |
-// DCHECK_LT(i, size()); | |
- m_is.seekg(i*sizeof(int_t), std::ios_base::beg); | |
- char buf[sizeof(int_t)]; | |
- m_is.read(buf, sizeof(int_t)); | |
- return *reinterpret_cast<int_t*>(buf); | |
- } | |
- size_t size() const { return m_size/sizeof(int_t); } | |
-}; | |
- | |
- | |
-using namespace stxxl; | |
-void bwt() { | |
- IntegerFileForwardIterator<uint40> sa { "/bighome/workspace/eSAIS/build/src/a.sa5" }; | |
- std::ifstream is("/bighome/workspace/eSAIS/build/src/a", std::ios::binary | std::ios::in); | |
- while(is) { | |
- | |
- } | |
- | |
-} | |
- | |
- // struct KeyExtractor { | |
- // typedef uint40 key_type; | |
- // typedef std::pair<uint40,uint40> value_type; | |
- // key_type m_key; | |
- // | |
- // KeyExtractor() {} | |
- // KeyExtractor(const key_type& k) : m_key(k) {} | |
- // key_type operator()(const value_type& v) const { return v.first; } | |
- // value_type min_value() const { return value_type(0,0); } | |
- // //value_type max_value() const { return std::make_pair(std::numeric_limits<key_type>::max(),0); } | |
- // value_type max_value() const { return value_type(m_key,0); } | |
- // }; | |
- | |
-template<class pair_t> | |
- struct KeyExtractor { | |
- typedef pair_t value_type; | |
- typedef typename pair_t::first_type key_type; | |
- key_type m_key; | |
- | |
- KeyExtractor() {} | |
- KeyExtractor(const key_type& k) : m_key(k) {} | |
- key_type operator()(const value_type& v) const { return v.first; } | |
- value_type min_value() const { return pair_t(0, (typename value_type::second_type)0); } | |
- //value_type max_value() const { return std::make_pair(std::numeric_limits<key_type>::max(),0); } | |
- value_type max_value() const { return pair_t(m_key, (typename value_type::second_type)0); } | |
- }; | |
- | |
-using namespace std; | |
-int main(int argc, char** argv) { | |
- if(argc != 2) { | |
- cout << "Usage: " << argv[0] << " text-file" << std::endl; | |
- return 1; | |
- } | |
- const std::string textfilename = argv[1]; | |
- const std::string safilename = textfilename + ".sa5"; | |
- const std::string isafilename = textfilename + ".isa5"; | |
- const std::string bwtfilename = textfilename + ".bwt"; | |
- if(!file_exists(textfilename.c_str())) { | |
- cout << "Could not open text file " << textfilename << std::endl; | |
- return 1; | |
- } | |
- if(!file_exists(safilename.c_str())) { | |
- cout << "Could not open SA file " << safilename << std::endl; | |
- return 1; | |
- } | |
- stxxl::VECTOR_GENERATOR<std::pair<uint40,uint40>>::result isa; // (text_position, factor_length) | |
- IntegerFileForwardIterator<uint40> safile { safilename.c_str() }; | |
- while(safile.index() < safile.size()) { | |
- uint40 index = static_cast<uint64>(safile.index()); | |
- isa.push_back(std::make_pair(*safile++,index)); | |
- } | |
- stxxl::ksort(isa.begin(), isa.end(), KeyExtractor<std::pair<uint40,uint40>>(isa.size()),512*1024*1024); //, STXXL_DEFAULT_ALLOC_STRATEGY()); | |
- std::ofstream isa_out(isafilename, std::ios::binary); | |
- for(auto it = isa.begin(); it != isa.end(); ++it) { | |
- isa_out.write((char*)(&it->second), sizeof(uint40)); | |
- } | |
- isa_out.close(); | |
- | |
- stxxl::VECTOR_GENERATOR<std::pair<uint40,char>>::result bwt; | |
- ifstream textfile(textfilename, ios::in | ios::binary); | |
- const uint40 isa_zero = isa.begin()->second; | |
- { | |
- auto it = isa.begin(); | |
- ++it; | |
- for(; it != isa.end(); ++it) { | |
- bwt.push_back(std::make_pair(it->second+1, textfile.get())); | |
- DCHECK(textfile.good()); | |
- } | |
- DCHECK(textfile.good()); | |
- bwt.push_back(std::make_pair(isa_zero,textfile.get())); | |
- DCHECK(textfile.good()); | |
- } | |
- stxxl::ksort(bwt.begin(), bwt.end(), KeyExtractor<std::pair<uint40,char>>(isa.size()),512*1024*1024); //, STXXL_DEFAULT_ALLOC_STRATEGY()); | |
- std::ofstream bwt_out(bwtfilename, std::ios::binary); | |
- for(auto it = bwt.begin(); it != bwt.end(); ++it) { | |
- if(it->second == 0) bwt_out.put(1); // TODO BUG: prevent writing the 0-byte by writing 1 | |
- else bwt_out.put(it->second); | |
- } | |
- | |
- | |
- // stxxl::VECTOR_GENERATOR<std::pair<uint40,uint40>>::result bwt; // (text_position, factor_length) | |
- // ifstream textfile(textfilename, ios::in | ios::binary); | |
- | |
- // | |
- // { | |
- // IntegerFileForwardIterator<uint40> sa { "/bighome/workspace/eSAIS/build/src/a.sa5" }; | |
- // IntegerFileForwardIterator<uint40> isa { "/bighome/workspace/eSAIS/build/src/a.isa5" }; | |
- // IntegerFileForwardIterator<uint40> plcp { "/bighome/workspace/eSAIS/build/src/a.plcp5" }; | |
- // while(sa.index() < sa.size()) { | |
- // std::cout << *sa++ << "," << *isa++ << "," << *plcp++ << endl; | |
- // } | |
- // } | |
- // std::cout << endl; | |
- // { | |
- // IntegerFileArray<uint40> sa { "/bighome/workspace/eSAIS/build/src/a.sa5" }; | |
- // IntegerFileArray<uint40> isa { "/bighome/workspace/eSAIS/build/src/a.isa5" }; | |
- // IntegerFileArray<uint40> plcp { "/bighome/workspace/eSAIS/build/src/a.plcp5" }; | |
- // for(size_t i = 0; i < sa.size(); ++i) { | |
- // std::cout << sa[i] << "," << isa[i] << "," << plcp[i] << endl; | |
- // } | |
- // } | |
-} | |
diff --git a/exttools/tools/readplcp.cpp b/exttools/tools/readplcp.cpp | |
deleted file mode 100644 | |
index c58a5ca4..00000000 | |
--- a/exttools/tools/readplcp.cpp | |
+++ /dev/null | |
@@ -1,153 +0,0 @@ | |
-/*************************************************************************** | |
- * test1.cpp | |
- * | |
- * Part of a simple STXXL example. See http://stxxl.sourceforge.net | |
- * | |
- * Copyright (C) 2013 Timo Bingmann <tb@panthema.net> | |
- * | |
- * Distributed under the Boost Software License, Version 1.0. | |
- * (See accompanying file LICENSE_1_0.txt or copy at | |
- * http://www.boost.org/LICENSE_1_0.txt) | |
- **************************************************************************/ | |
- | |
-#include <iostream> | |
-#include <limits> | |
- | |
-#include <stxxl/vector> | |
-#include <stxxl/random> | |
-#include <stxxl/sort> | |
-#include <stxxl/bits/algo/ksort.h> | |
-#include "/scripts/code/dcheck.hpp" | |
- | |
-#include <iostream> | |
-#include <fstream> | |
-#include <stxxl/bits/common/uint_types.h> | |
-#include "/home/niki/opt/include/sdsl/bits.hpp" | |
- | |
-size_t filesize( const char*const filepath ){ | |
- std::ifstream file(filepath, std::ios::binary | std::ios::ate | std::ios::in); | |
- if(!file.good()) return 0; | |
- return file.tellg(); | |
-} | |
-bool file_exists(const char *const filepath) { | |
- std::ifstream infile(filepath); | |
- return infile.good(); | |
-} | |
-typedef size_t len_t; | |
- | |
- | |
- | |
- | |
-class PLCPFileForwardIterator { | |
- std::ifstream m_is; | |
- | |
- uint64_t m_chunk = 0; // current data chunk | |
- len_t m_idx = 0; // current select parameter | |
- len_t m_block = 0; // block index | |
- len_t m_blockrank = 0; //number of ones up to previous block | |
- uint_fast8_t m_ones; // number of ones in the current block `m_block` | |
- | |
- void read_chunk() { | |
- m_is.read(reinterpret_cast<char*>(&m_chunk), sizeof(decltype(m_chunk))); | |
- m_ones = sdsl::bits::cnt(m_chunk); | |
- } | |
- | |
- public: | |
- static constexpr const len_t eof = -1; | |
- PLCPFileForwardIterator(const char* filepath) | |
- : m_is(filepath) | |
- { | |
- read_chunk(); | |
- } | |
- | |
- len_t index() const { return m_idx; } | |
- bool has_next() const { | |
- return m_is; | |
- } | |
- | |
- len_t next_select() { | |
- while(m_blockrank+m_ones < m_idx+1) { | |
- if(!m_is) {break;} | |
- ++m_block; | |
- m_blockrank += m_ones; | |
- read_chunk(); | |
- } | |
- return 64*m_block + sdsl::bits::sel(m_chunk, m_idx+1-m_blockrank); | |
- } | |
- len_t operator()() { | |
- const len_t ret = next_select() - 2*m_idx; | |
- return ret; | |
- } | |
- void advance() { | |
- ++m_idx; | |
- } | |
-}; | |
- | |
-template<class int_t> | |
-class IntegerFileForwardIterator { | |
- const size_t m_size; | |
- std::ifstream m_is; | |
- size_t m_index; | |
- char m_buf[sizeof(int_t)]; | |
- public: | |
- | |
- IntegerFileForwardIterator(const char*const filename) | |
- : m_size { filesize(filename) } | |
- , m_is {filename, std::ios::binary | std::ios::in } | |
- , m_index {0} | |
- {} | |
- | |
- size_t size() const { return m_size/sizeof(int_t); } | |
- size_t index() const { return m_index; } | |
- int_t operator*() { return *reinterpret_cast<int_t*>(m_buf); } | |
- IntegerFileForwardIterator& operator++(int) { | |
- m_is.read(m_buf, sizeof(int_t)); | |
- ++m_index; | |
- return *this; | |
- } | |
-}; | |
- | |
-template<class int_t> | |
-class IntegerFileArray { | |
- const size_t m_size; | |
- std::ifstream m_is; | |
- public: | |
- IntegerFileArray(const char*const filename) | |
- : m_size { filesize(filename) } | |
- , m_is {filename, std::ios::binary | std::ios::in } | |
- {} | |
- int_t operator[](size_t i) { | |
-// DCHECK_LT(i, size()); | |
- m_is.seekg(i*sizeof(int_t), std::ios_base::beg); | |
- char buf[sizeof(int_t)]; | |
- m_is.read(buf, sizeof(int_t)); | |
- return *reinterpret_cast<int_t*>(buf); | |
- } | |
- size_t size() const { return m_size/sizeof(int_t); } | |
-}; | |
- | |
- | |
-using namespace std; | |
-int main(int argc, char** argv) { | |
- if(argc != 2) { | |
- cout << "Usage: " << argv[0] << " text-file" << std::endl; | |
- return 1; | |
- } | |
- const std::string textfilename = argv[1]; | |
- const std::string plcpfilename = textfilename + ".plcp"; | |
- if(!file_exists(plcpfilename.c_str())) { | |
- cout << "Could not open text file " << textfilename << std::endl; | |
- return 1; | |
- } | |
- PLCPFileForwardIterator p(plcpfilename.c_str()); | |
- size_t i=0; | |
-// while(i < 100) { | |
- while(p.has_next()) { | |
- size_t entry = p(); | |
-// if(!p.has_next()) break; | |
- std::cout << i++ << "->" << entry << "->" << p.has_next() << endl; | |
- p.advance(); | |
- } | |
- | |
- | |
-} | |
diff --git a/exttools/tools/standardize.cpp b/exttools/tools/standardize.cpp | |
deleted file mode 100644 | |
index 2148db85..00000000 | |
--- a/exttools/tools/standardize.cpp | |
+++ /dev/null | |
@@ -1,30 +0,0 @@ | |
-#include <iostream> | |
-#include <fstream> | |
-using namespace std; | |
- | |
-size_t filesize( const char*const filepath ){ | |
- std::ifstream file(filepath, std::ios::binary | std::ios::ate | std::ios::in); | |
- if(!file.good()) return 0; | |
- return file.tellg(); | |
-} | |
-bool file_exists(const char *const filepath) { | |
- std::ifstream infile(filepath); | |
- return infile.good(); | |
-} | |
- | |
-int main(int argc, char** argv) { | |
- if(argc != 2) { | |
- cout << "Usage: " << argv[0] << " text-file" << std::endl; | |
- return 1; | |
- } | |
- const std::string textfilename = argv[1]; | |
- const std::string Ztextfilename = textfilename + ".0"; | |
- if(!file_exists(textfilename.c_str())) { | |
- cout << "Could not open text file " << textfilename << std::endl; | |
- return 1; | |
- } | |
- std::ifstream is(textfilename, std::ios::binary); | |
- std::ofstream os(Ztextfilename, std::ios::binary); | |
- os << is.rdbuf(); | |
- os.put(0); | |
-} | |
diff --git a/include/tudocomp/Coder.hpp b/include/tudocomp/Coder.hpp | |
index c66bf5f7..99ab41c4 100644 | |
--- a/include/tudocomp/Coder.hpp | |
+++ b/include/tudocomp/Coder.hpp | |
@@ -26,7 +26,7 @@ public: | |
/// \param out The bit stream to write to. | |
/// \param literals The literal iterator. | |
template<typename literals_t> | |
- inline Encoder(Env&& env, std::shared_ptr<BitOStream> out, literals_t&& literals) | |
+ inline Encoder(Env&& env, std::shared_ptr<BitOStream> out, literals_t&&) | |
: Algorithm(std::move(env)), m_out(out) { | |
} | |
@@ -69,7 +69,7 @@ public: | |
/// \param v The value to encode. | |
/// \param r Unused. | |
template<typename value_t> | |
- inline void encode(value_t v, const BitRange& r) { | |
+ inline void encode(value_t v, const BitRange&) { | |
m_out->write_bit(v); | |
} | |
@@ -134,7 +134,7 @@ public: | |
/// \param r Unused. | |
/// \return The decoded bit value (zero or one). | |
template<typename value_t> | |
- inline value_t decode(const BitRange& r) { | |
+ inline value_t decode(const BitRange&) { | |
return value_t(m_in->read_bit()); | |
} | |
diff --git a/include/tudocomp/Meta.hpp b/include/tudocomp/Meta.hpp | |
index ca1e75ef..4d926649 100644 | |
--- a/include/tudocomp/Meta.hpp | |
+++ b/include/tudocomp/Meta.hpp | |
@@ -91,6 +91,8 @@ public: | |
/// \tparam T The Algorithm type. | |
template<class T> | |
inline void templated(const std::string& accepted_type) { | |
+ (void) accepted_type; // TODO: Actual use this parameter | |
+ | |
m_meta.check_arg(m_argument_name); | |
Meta sub_meta = T::meta(); | |
m_meta.m_sub_metas.push_back(sub_meta); | |
diff --git a/include/tudocomp/coders/ASCIICoder.hpp b/include/tudocomp/coders/ASCIICoder.hpp | |
index fc9b2866..d22bbafb 100644 | |
--- a/include/tudocomp/coders/ASCIICoder.hpp | |
+++ b/include/tudocomp/coders/ASCIICoder.hpp | |
@@ -31,7 +31,7 @@ public: | |
using tdc::Encoder::Encoder; | |
template<typename value_t> | |
- inline void encode(value_t v, const Range& r) { | |
+ inline void encode(value_t v, const Range&) { | |
std::ostringstream s; | |
s << v; | |
for(uint8_t c : s.str()) m_out->write_int(c); | |
@@ -39,12 +39,12 @@ public: | |
} | |
template<typename value_t> | |
- inline void encode(value_t v, const LiteralRange& r) { | |
+ inline void encode(value_t v, const LiteralRange&) { | |
m_out->write_int(uint8_t(v)); | |
} | |
template<typename value_t> | |
- inline void encode(value_t v, const BitRange& r) { | |
+ inline void encode(value_t v, const BitRange&) { | |
m_out->write_int(v ? '1' : '0'); | |
} | |
}; | |
@@ -55,7 +55,7 @@ public: | |
using tdc::Decoder::Decoder; | |
template<typename value_t> | |
- inline value_t decode(const Range& r) { | |
+ inline value_t decode(const Range&) { | |
std::ostringstream os; | |
for(uint8_t c = m_in->read_int<uint8_t>(); | |
c >= '0' && c <= '9'; | |
@@ -73,12 +73,12 @@ public: | |
} | |
template<typename value_t> | |
- inline value_t decode(const LiteralRange& r) { | |
+ inline value_t decode(const LiteralRange&) { | |
return value_t(m_in->read_int<uint8_t>()); | |
} | |
template<typename value_t> | |
- inline value_t decode(const BitRange& r) { | |
+ inline value_t decode(const BitRange&) { | |
uint8_t b = m_in->read_int<uint8_t>(); | |
return (b != '0'); | |
} | |
diff --git a/include/tudocomp/coders/HuffmanCoder.hpp b/include/tudocomp/coders/HuffmanCoder.hpp | |
index 5538b13e..eddab809 100644 | |
--- a/include/tudocomp/coders/HuffmanCoder.hpp | |
+++ b/include/tudocomp/coders/HuffmanCoder.hpp | |
@@ -38,7 +38,7 @@ namespace huff { | |
std::memset(C, 0, sizeof(len_t)*(ULITERAL_MAX+1)); | |
while(input.has_next()) { | |
- literal_t c = input.next().c; | |
+ uliteral_t c = input.next().c; | |
DCHECK_LT(static_cast<uliteral_t>(c), ULITERAL_MAX+1); | |
DCHECK_LT(C[static_cast<uliteral_t>(c)], std::numeric_limits<len_t>::max()); | |
++C[static_cast<uliteral_t>(c)]; | |
@@ -325,7 +325,7 @@ namespace huff { | |
* Encodes a stream storing input_length characters | |
*/ | |
inline void huffman_encode( | |
- std::basic_istream<literal_t>& input, | |
+ std::istream& input, | |
tdc::io::BitOStream& os, | |
const size_t input_length, | |
const uint8_t*const ordered_map_from_effective, | |
@@ -339,7 +339,7 @@ namespace huff { | |
{//now writing | |
os.write_compressed_int<size_t>(input_length); | |
- literal_t c; | |
+ char c; | |
while(input.get(c)) { | |
huffman_encode(c, os, ordered_codelengths, ordered_map_to_effective, alphabet_size, codewords); | |
} | |
@@ -369,7 +369,7 @@ namespace huff { | |
DVLOG(2) << "prefix_sum_lengths : " << arr_to_debug_string(prefix_sum_lengths, longest); | |
return prefix_sum_lengths; | |
} | |
- inline literal_t huffman_decode( | |
+ inline uliteral_t huffman_decode( | |
tdc::io::BitIStream& is, | |
const uliteral_t*const ordered_map_from_effective, | |
const size_t*const prefix_sum_lengths, | |
@@ -394,7 +394,7 @@ namespace huff { | |
inline void huffman_decode( | |
tdc::io::BitIStream& is, | |
- std::basic_ostream<literal_t>& output, | |
+ std::ostream& output, | |
const uliteral_t*const ordered_map_from_effective, | |
const uint8_t*const ordered_codelengths, | |
const size_t alphabet_size, | |
diff --git a/include/tudocomp/compressors/MTFCompressor.hpp b/include/tudocomp/compressors/MTFCompressor.hpp | |
index 2b5ebed5..f65967a6 100644 | |
--- a/include/tudocomp/compressors/MTFCompressor.hpp | |
+++ b/include/tudocomp/compressors/MTFCompressor.hpp | |
@@ -42,7 +42,7 @@ value_type mtf_decode_char(const value_type v, value_type*const table) { | |
return return_value; | |
} | |
-template<class char_type = literal_t> | |
+template<class char_type = uliteral_t> | |
void mtf_encode(std::basic_istream<char_type>& is, std::basic_ostream<char_type>& os) { | |
typedef typename std::make_unsigned<char_type>::type value_type; // -> default: uint8_t | |
static constexpr size_t table_size = std::numeric_limits<value_type>::max()+1; | |
@@ -55,7 +55,7 @@ void mtf_encode(std::basic_istream<char_type>& is, std::basic_ostream<char_type> | |
} | |
} | |
-template<class char_type = literal_t> | |
+template<class char_type = uliteral_t> | |
void mtf_decode(std::basic_istream<char_type>& is, std::basic_ostream<char_type>& os) { | |
typedef typename std::make_unsigned<char_type>::type value_type; // -> default: uint8_t | |
static constexpr size_t table_size = std::numeric_limits<value_type>::max()+1; | |
@@ -66,7 +66,7 @@ void mtf_decode(std::basic_istream<char_type>& is, std::basic_ostream<char_type> | |
while(is.get(c)) { | |
os << mtf_decode_char(static_cast<value_type>(c), table); | |
} | |
-}; | |
+} | |
class MTFCompressor : public Compressor { | |
public: | |
diff --git a/include/tudocomp/compressors/lcpcomp/compress/PLCPStrategy.hpp b/include/tudocomp/compressors/lcpcomp/compress/PLCPStrategy.hpp | |
index 17b177c7..fdaec2e9 100644 | |
--- a/include/tudocomp/compressors/lcpcomp/compress/PLCPStrategy.hpp | |
+++ b/include/tudocomp/compressors/lcpcomp/compress/PLCPStrategy.hpp | |
@@ -25,8 +25,6 @@ size_t filesize( const char*const filepath ){ | |
return file.tellg(); | |
} | |
-#include <tudocomp_stat/StatPhase.hpp> | |
- | |
namespace tdc { | |
namespace lcpcomp { | |
@@ -242,7 +240,7 @@ class PLCPFileForwardIterator { | |
len_t index() const { return m_idx; } | |
bool has_next() const { | |
- return !m_is.fail(); | |
+ return m_is; | |
} | |
len_t next_select() { | |
@@ -268,6 +266,8 @@ class PLCPFileForwardIterator { | |
template<class RefStrategy,class plcp_type> | |
void compute_references(const size_t n, RefStrategy& refStrategy, plcp_type& pplcp, size_t threshold) { | |
+ env().end_stat_phase(); | |
+ env().begin_stat_phase("Search Peaks"); | |
struct Poi { | |
len_t pos; | |
@@ -386,7 +386,8 @@ class PLCPFileForwardIterator { | |
// DCHECK_EQ(plcp[lastpos], plcp_i); | |
lastpos_lcp = plcp_i; | |
} | |
- IF_STATS(StatPhase::log("max heap size", max_heap_size)); | |
+ IF_STATS(env().log_stat("max heap size", max_heap_size)); | |
+ env().end_stat_phase(); | |
} | |
@@ -426,15 +427,13 @@ public: | |
// refStrategy.factorize(refs); | |
// env().end_stat_phase(); | |
// } | |
- | |
- inline static ds::dsflags_t textds_flags() { | |
- return text_t::SA | text_t::ISA; | |
- } | |
- inline void factorize(text_t& text, size_t threshold, lzss::FactorBuffer& refs) { | |
- StatPhase phase("Load Index DS"); | |
+ inline void factorize(text_t& text, | |
+ size_t threshold, | |
+ lzss::FactorBuffer& refs) { | |
+ env().begin_stat_phase("Load index ds"); | |
// const std::string textfilename = "/bighome/workspace/compreSuite/tudocomp/datasets/abracadabra.0"; | |
- const std::string textfilename = "/local1/cc_commoncrawl.ascii.10MB.0"; | |
+ const std::string textfilename = "/bighome/workspace/compreSuite/tudocomp/datasets/cc_commoncrawl.ascii.10MB.0"; | |
IntegerFileArray<uint_t<40>> sa ((textfilename + ".sa5").c_str()); | |
IntegerFileArray<uint_t<40>> isa ((textfilename + ".isa5").c_str()); | |
// SAFileArray<uint_t<40>> sa((textfilename + ".sa5").c_str()); | |
@@ -442,177 +441,44 @@ public: | |
//IntegerFileForwardIterator<uint_t<40>> pplcp("/bighome/workspace/compreSuite/tudocomp/datasets/pc_english.200MB.plcp5"); | |
DCHECK_EQ(sa.size(), text.size()); | |
-IF_DEBUG( | |
- StatPhase::wrap("Check Index DS", [&]{ | |
- const auto& tsa = text.require_sa(); | |
- const auto& tisa = text.require_isa(); | |
- const auto& plcp = text.require_plcp(); | |
- PLCPFileForwardIterator pplcp ((textfilename + ".plcp").c_str()); | |
- for(size_t i = 0; i < sa.size(); ++i) { | |
- DCHECK_EQ(sa.size(),tsa.size()); | |
- DCHECK_EQ(sa[i], (uint64_t)tsa[i]); | |
- } | |
- DCHECK_EQ(isa.size(),tisa.size()); | |
- for(size_t i = 0; i < isa.size(); ++i) { | |
- DCHECK_EQ(isa[i], (uint64_t)tisa[i]); | |
- } | |
- for(size_t i = 0; i < plcp.size()-1; ++i) { | |
- DCHECK_EQ(pplcp(),(uint64_t) plcp[i]); | |
- pplcp.advance(); | |
- } | |
- }); | |
- ); | |
+//IF_DEBUG({ | |
+ { | |
+ env().begin_stat_phase("Construct index ds"); | |
+ text.require(text_t::SA | text_t::ISA | text_t::PLCP); | |
+ | |
+ const auto& tsa = text.require_sa(); | |
+ const auto& tisa = text.require_isa(); | |
+ const auto& plcp = text.require_plcp(); | |
+ PLCPFileForwardIterator pplcp ((textfilename + ".plcp").c_str()); | |
+ for(size_t i = 0; i < sa.size(); ++i) { | |
+ DCHECK_EQ(sa.size(),tsa.size()); | |
+ DCHECK_EQ(sa[i], (uint64_t)tsa[i]); | |
+ } | |
+ DCHECK_EQ(isa.size(),tisa.size()); | |
+ for(size_t i = 0; i < isa.size(); ++i) { | |
+ DCHECK_EQ(isa[i], (uint64_t)tisa[i]); | |
+ } | |
+ for(size_t i = 0; i < plcp.size()-1; ++i) { | |
+ DCHECK_EQ(pplcp(),(uint64_t) plcp[i]); | |
+ pplcp.advance(); | |
+ } | |
+ env().end_stat_phase(); | |
+ env().begin_stat_phase("Check"); | |
+ | |
+ | |
+ env().end_stat_phase(); | |
+ } | |
+// })//DEBUG | |
PLCPFileForwardIterator pplcp ((textfilename + ".plcp").c_str()); | |
RefDiskStrategy<decltype(sa),decltype(isa)> refStrategy(sa,isa); | |
- StatPhase::wrap("Search Peaks", [&]{ | |
- compute_references(text.size()-1, refStrategy, pplcp, threshold); | |
- }); | |
- StatPhase::wrap("Compute References", [&]{ | |
- refStrategy.factorize(refs); | |
- }); | |
+ compute_references(text.size()-1, refStrategy, pplcp, threshold); | |
+ env().begin_stat_phase("Compute References"); | |
+ refStrategy.factorize(refs); | |
+ env().end_stat_phase(); | |
- } | |
- // | |
- // inline void factorize(text_t& text, | |
- // size_t threshold, | |
- // lzss::FactorBuffer& factors) { | |
- // | |
- // // Construct SA, ISA and LCP | |
- // auto pplcp = StatPhase::wrap("Construct index ds", [&]{ | |
- // text.require(text_t::SA | text_t::ISA); | |
- // const auto& sa = text.require_sa(); | |
- // return LCPForwardIterator { (construct_plcp_bitvector(env(), sa, text)) }; | |
- // }); | |
- // | |
- // const auto& sa = text.require_sa(); | |
- // const auto& isa = text.require_isa(); | |
- // const len_t n = sa.size(); | |
- // | |
- // StatPhase::wrap("Search Peaks", [&]{ | |
- // | |
- // struct Poi { | |
- // len_t pos; | |
- // len_t lcp; | |
- // len_t no; | |
- // Poi(len_t _pos, len_t _lcp, len_t _no) : pos(_pos), lcp(_lcp), no(_no) {} | |
- // bool operator<(const Poi& o) const { | |
- // DCHECK_NE(o.pos, this->pos); | |
- // if(o.lcp == this->lcp) return this->pos > o.pos; | |
- // return this->lcp < o.lcp; | |
- // } | |
- // }; | |
- // | |
- // boost::heap::pairing_heap<Poi> heap; | |
- // std::vector<boost::heap::pairing_heap<Poi>::handle_type> handles; | |
- // | |
- // IF_STATS(len_t max_heap_size = 0); | |
- // | |
- // // std::stack<poi> pois; // text positions of interest, i.e., starting positions of factors we want to replace | |
- // | |
- // len_t lastpos = 0; | |
- // len_t lastpos_lcp = 0; | |
- // for(len_t i = 0; i+1 < n; ++i) { | |
- // while(pplcp.index() < i) pplcp.advance(); | |
- // const len_t plcp_i = pplcp(); DCHECK_EQ(pplcp.index(), i); | |
- // if(heap.empty()) { | |
- // if(plcp_i >= threshold) { | |
- // handles.emplace_back(heap.emplace(i, plcp_i, handles.size())); | |
- // lastpos = i; | |
- // lastpos_lcp = plcp_i; | |
- // } | |
- // continue; | |
- // } | |
- // if(i - lastpos >= lastpos_lcp || tdc_unlikely(i+1 == n)) { | |
- // IF_DEBUG(bool first = true); | |
- // IF_STATS(max_heap_size = std::max<len_t>(max_heap_size, heap.size())); | |
- // DCHECK_EQ(heap.size(), handles.size()); | |
- // while(!heap.empty()) { | |
- // const Poi& top = heap.top(); | |
- // const len_t source_position = sa[isa[top.pos]-1]; | |
- // factors.emplace_back(top.pos, source_position, top.lcp); | |
- // const len_t next_pos = top.pos; // store top, this is the current position that gets factorized | |
- // IF_DEBUG(if(first) DCHECK_EQ(top.pos, lastpos); first = false;) | |
- // | |
- // { | |
- // len_t newlcp_peak = 0; // a new peak can emerge at top.pos+top.lcp | |
- // bool peak_exists = false; | |
- // if(top.pos+top.lcp < i) | |
- // for(len_t j = top.no+1; j < handles.size(); ++j) { // erase all right peaks that got substituted | |
- // if( handles[j].node_ == nullptr) continue; | |
- // const Poi poi = *(handles[j]); | |
- // DCHECK_LT(next_pos, poi.pos); | |
- // if(poi.pos < next_pos+top.lcp) { | |
- // heap.erase(handles[j]); | |
- // handles[j].node_ = nullptr; | |
- // if(poi.lcp + poi.pos > next_pos+top.lcp) { | |
- // const len_t remaining_lcp = poi.lcp+poi.pos - (next_pos+top.lcp); | |
- // DCHECK_NE(remaining_lcp,0); | |
- // if(newlcp_peak != 0) DCHECK_LE(remaining_lcp, newlcp_peak); | |
- // newlcp_peak = std::max(remaining_lcp, newlcp_peak); | |
- // } | |
- // } else if( poi.pos == next_pos+top.lcp) { peak_exists=true; } | |
- // else { break; } // only for performance | |
- // } | |
- // #ifdef DEBUG | |
- // if(peak_exists) { //TODO: DEBUG | |
- // for(len_t j = top.no+1; j < handles.size(); ++j) { | |
- // if( handles[j].node_ == nullptr) continue; | |
- // const Poi& poi = *(handles[j]); | |
- // if(poi.pos == next_pos+top.lcp) { | |
- // DCHECK_LE(newlcp_peak, poi.lcp); | |
- // break; | |
- // } | |
- // } | |
- // } | |
- // #endif | |
- // if(!peak_exists && newlcp_peak >= threshold) { | |
- // len_t j = top.no+1; | |
- // DCHECK(handles[j].node_ == nullptr); | |
- // handles[j] = heap.emplace(next_pos+top.lcp, newlcp_peak, j); | |
- // } | |
- // | |
- // } | |
- // handles[top.no].node_ = nullptr; | |
- // heap.pop(); // top now gets erased | |
- // | |
- // for(auto it = handles.rbegin(); it != handles.rend(); ++it) { | |
- // if( (*it).node_ == nullptr) continue; | |
- // Poi& poi = (*(*it)); | |
- // if(poi.pos > next_pos) continue; | |
- // const len_t newlcp = next_pos - poi.pos; | |
- // if(newlcp < poi.lcp) { | |
- // if(newlcp < threshold) { | |
- // heap.erase(*it); | |
- // it->node_ = nullptr; | |
- // } else { | |
- // poi.lcp = newlcp; | |
- // heap.decrease(*it); | |
- // | |
- // } | |
- // } else { | |
- // break; | |
- // } | |
- // } | |
- // } | |
- // handles.clear(); | |
- // --i; | |
- // continue; | |
- // } | |
- // DCHECK_EQ(pplcp.index(), i); | |
- // DCHECK_EQ(plcp_i, pplcp()); | |
- // if(plcp_i <= lastpos_lcp) continue; | |
- // DCHECK_LE(threshold, plcp_i); | |
- // handles.emplace_back(heap.emplace(i,plcp_i, handles.size())); | |
- // lastpos = i; | |
- // // DCHECK_EQ(plcp[lastpos], plcp_i); | |
- // lastpos_lcp = plcp_i; | |
- // } | |
- // IF_STATS(StatPhase::log("max heap size", max_heap_size)); | |
- // | |
- // }); | |
- //} | |
+ } | |
}; | |
diff --git a/include/tudocomp/compressors/lz78/CedarTrie.hpp b/include/tudocomp/compressors/lz78/CedarTrie.hpp | |
index 6f0d23f8..af175910 100644 | |
--- a/include/tudocomp/compressors/lz78/CedarTrie.hpp | |
+++ b/include/tudocomp/compressors/lz78/CedarTrie.hpp | |
@@ -39,12 +39,10 @@ class LzwRootSearchPosMap { | |
std::array<CedarSearchPos, 256> m_array; | |
public: | |
inline CedarSearchPos get(uliteral_t c) { | |
- DCHECK(0 <= c); | |
DCHECK(c < m_array.size()); | |
return m_array[c]; | |
} | |
inline void set(uliteral_t c, CedarSearchPos v) { | |
- DCHECK(0 <= c); | |
DCHECK(c < m_array.size()); | |
m_array[c] = v; | |
} | |
@@ -181,7 +179,7 @@ public: | |
return m; | |
} | |
- CedarTrie(Env&& env, const size_t n, const size_t& remaining_characters, factorid_t reserve = 0) | |
+ CedarTrie(Env&& env, const size_t n, const size_t& remaining_characters, factorid_t = 0) | |
: Algorithm(std::move(env)) | |
, LZ78Trie(n, remaining_characters) | |
, m_trie(std::make_unique<cedar_t>()) {} | |
diff --git a/include/tudocomp/compressors/lz78/squeeze_node.hpp b/include/tudocomp/compressors/lz78/squeeze_node.hpp | |
index b8aef03d..2b13c7bf 100644 | |
--- a/include/tudocomp/compressors/lz78/squeeze_node.hpp | |
+++ b/include/tudocomp/compressors/lz78/squeeze_node.hpp | |
@@ -16,13 +16,13 @@ typedef uint_t<40> squeeze_node_t; // TODO: change this to bits_for(literal_t) + | |
#ifndef ALPHABET_BITS | |
- #define ALPHABET_BITS (sizeof(literal_t)*8) | |
+ #define ALPHABET_BITS (sizeof(uliteral_t)*8) | |
#endif //TODO alphabet_bits -> effective alphabet size | |
inline factorid_t get_id(squeeze_node_t data) { | |
return static_cast<uint64_t>(data)>>ALPHABET_BITS; | |
} | |
-inline literal_t get_letter(squeeze_node_t data) { | |
+inline uliteral_t get_letter(squeeze_node_t data) { | |
return static_cast<char>(static_cast<uint64_t>(data)) & 0xff; //TODO 0xff hard coded | |
} | |
inline squeeze_node_t create_node(factorid_t id, uliteral_t c) { | |
diff --git a/include/tudocomp/def.hpp b/include/tudocomp/def.hpp | |
index 8aa5a9c5..75f6b772 100644 | |
--- a/include/tudocomp/def.hpp | |
+++ b/include/tudocomp/def.hpp | |
@@ -65,30 +65,28 @@ namespace tdc { | |
constexpr size_t LEN_BITS = 8 * sizeof(len_t); | |
/// Type to represent signed single literals. | |
- typedef char literal_t; | |
- | |
- /// Type to represent unsigned single literals. | |
- typedef std::make_unsigned<literal_t>::type uliteral_t; | |
+ typedef uint8_t uliteral_t; | |
/// The maximum value of \ref uliteral_t. | |
constexpr size_t ULITERAL_MAX = std::numeric_limits<uliteral_t>::max(); | |
- /// Converts a literal to an unsigned integer value. | |
+ /// Converts a literal to an integer value as if unsigned. | |
/// | |
- /// \tparam the literal type. | |
+ /// \tparam T the integer type. | |
/// \param c the literal. | |
/// \return the corresponding unsigned integer value. | |
- template<class T> | |
- inline size_t literal2int(const T& c) { | |
- return static_cast<size_t>(c); | |
- } | |
+ template<typename T = size_t> | |
+ constexpr T literal2int(uliteral_t c) { | |
+ return std::make_unsigned_t<T>(c); | |
+ } | |
- /// Converts a signed literal to an unsigned integer value. | |
+ /// Converts an integer value to a literal as if unsigned. | |
/// | |
- /// \param c the literal. | |
- /// \return the corresponding unsigned integer value. | |
- template<> | |
- inline size_t literal2int(const literal_t& c) { | |
- return static_cast<size_t>(static_cast<uliteral_t>(c)); | |
- } | |
+ /// \tparam T the integer type. | |
+ /// \param c the integer value. | |
+ /// \return the corresponding literal. | |
+ template<typename T = size_t> | |
+ constexpr uliteral_t int2literal(const T& c) { | |
+ return std::make_unsigned_t<T>(c); | |
+ } | |
} | |
diff --git a/include/tudocomp/ds/BitPackingVector.hpp b/include/tudocomp/ds/BitPackingVector.hpp | |
index 2108ba6e..89109ead 100644 | |
--- a/include/tudocomp/ds/BitPackingVector.hpp | |
+++ b/include/tudocomp/ds/BitPackingVector.hpp | |
@@ -53,7 +53,7 @@ namespace int_vector { | |
m_vec(std::move(other.m_vec)), m_real_size(other.m_real_size) {} | |
inline uint8_t raw_width() const { return N; } | |
- inline void set_width_raw(uint8_t width) { } | |
+ inline void set_width_raw(uint8_t) { } | |
}; | |
diff --git a/include/tudocomp/ds/IntPtr.hpp b/include/tudocomp/ds/IntPtr.hpp | |
index 4b1a157a..74dffcce 100644 | |
--- a/include/tudocomp/ds/IntPtr.hpp | |
+++ b/include/tudocomp/ds/IntPtr.hpp | |
@@ -38,7 +38,7 @@ namespace tdc { | |
sdsl::bits::write_int(word, x, offset, len); | |
} | |
template<> | |
- inline void write_int<uint_t<1>>(uint64_t* word, uint64_t v, uint8_t o, const uint8_t len) { | |
+ inline void write_int<uint_t<1>>(uint64_t* word, uint64_t v, uint8_t o, const uint8_t) { | |
auto& p = *word; | |
const auto mask = uint64_t(1) << o; | |
@@ -53,7 +53,7 @@ namespace tdc { | |
} | |
template<> | |
- inline uint64_t read_int<uint_t<1>>(const uint64_t* word, uint8_t o, const uint8_t len) { | |
+ inline uint64_t read_int<uint_t<1>>(const uint64_t* word, uint8_t o, const uint8_t) { | |
const auto p = *word; | |
const auto mask = uint64_t(1) << o; | |
@@ -71,7 +71,7 @@ struct RefDispatch { | |
v, | |
self.m_ptr.m_bit_offset, | |
self.m_ptr.data_bit_size()); | |
- }; | |
+ } | |
template<class Ref, class R> | |
inline static R cast_for_op(const Ref& self) { | |
@@ -159,7 +159,7 @@ namespace int_vector { | |
private: | |
//const uint8_t m_bit_size; | |
public: | |
- Data(const DynamicIntValueType* ptr, uint8_t offset, uint8_t size): | |
+ Data(const DynamicIntValueType* ptr, uint8_t offset, uint8_t /*size*/): | |
m_ptr(ptr), m_bit_offset(offset) /*, m_bit_size(size)*/ {} | |
inline uint8_t data_bit_size() const { return N; } | |
inline Data data_offset_to(const DynamicIntValueType* ptr, uint8_t offset) const { | |
@@ -177,7 +177,7 @@ namespace int_vector { | |
private: | |
//const uint8_t m_bit_size; | |
public: | |
- Data(DynamicIntValueType* ptr, uint8_t offset, uint8_t size): | |
+ Data(DynamicIntValueType* ptr, uint8_t offset, uint8_t /*size*/): | |
m_ptr(ptr), m_bit_offset(offset) /*, m_bit_size(size)*/ {} | |
inline uint8_t data_bit_size() const { return N; } | |
inline Data data_offset_to(DynamicIntValueType* ptr, uint8_t offset) { | |
@@ -448,7 +448,7 @@ namespace int_vector { | |
template<class T> | |
inline IntRef<T>& IntRef<T>::operator=(const ConstIntRef<T>& other) { | |
return operator=(value_type(other)); | |
- }; | |
+ } | |
template<class T> | |
inline IntRef<T> IntPtr<T>::operator*() { | |
diff --git a/include/tudocomp/ds/LCPSada.hpp b/include/tudocomp/ds/LCPSada.hpp | |
index 33a3cdfa..6f935317 100644 | |
--- a/include/tudocomp/ds/LCPSada.hpp | |
+++ b/include/tudocomp/ds/LCPSada.hpp | |
@@ -174,7 +174,7 @@ inline static sdsl::bit_vector construct_plcp_bitvector(const plcp_t& plcp) { | |
} | |
template<class sa_t, class text_t, class select_t = sdsl::select_support_mcl<1,1>> | |
-sdsl::bit_vector construct_plcp_bitvector(Env& env, const sa_t& sa, const text_t& text) { | |
+sdsl::bit_vector construct_plcp_bitvector(Env&, const sa_t& sa, const text_t& text) { | |
typedef DynamicIntVector phi_t; | |
phi_t phi = StatPhase::wrap("Construct Phi Array", [&]{ | |
diff --git a/include/tudocomp/ds/dynamic_t.hpp b/include/tudocomp/ds/dynamic_t.hpp | |
index 180940aa..9007ef79 100644 | |
--- a/include/tudocomp/ds/dynamic_t.hpp | |
+++ b/include/tudocomp/ds/dynamic_t.hpp | |
@@ -14,7 +14,7 @@ struct DyntDispatch { | |
template<class Ref, class V> | |
inline static void assign(Ref& self, V v) { | |
self.m_data = v; | |
- }; | |
+ } | |
template<class Ref, class R> | |
inline static R cast_for_op(const Ref& self) { | |
diff --git a/include/tudocomp/ds/uint_t.hpp b/include/tudocomp/ds/uint_t.hpp | |
index a2fd364a..65cb8085 100644 | |
--- a/include/tudocomp/ds/uint_t.hpp | |
+++ b/include/tudocomp/ds/uint_t.hpp | |
@@ -17,7 +17,7 @@ struct UinttDispatch { | |
template<class Ref, class V> | |
inline static void assign(Ref& self, V v) { | |
self.m_data = v; | |
- }; | |
+ } | |
template<class Ref, class R> | |
inline static R cast_for_op(const Ref& self) { | |
diff --git a/include/tudocomp/io/InputSource.hpp b/include/tudocomp/io/InputSource.hpp | |
index 59676e26..a0a36682 100644 | |
--- a/include/tudocomp/io/InputSource.hpp | |
+++ b/include/tudocomp/io/InputSource.hpp | |
@@ -61,7 +61,7 @@ namespace tdc {namespace io { | |
&& lhs.m_view.size() == rhs.m_view.size() | |
&& lhs.m_path == rhs.m_path | |
&& lhs.m_stream == rhs.m_stream; | |
- }; | |
+ } | |
inline std::ostream& operator<<(std::ostream& o, const InputSource& v) { | |
if (v.is_view()) { | |
diff --git a/include/tudocomp/io/ViewStream.hpp b/include/tudocomp/io/ViewStream.hpp | |
index cb9f1657..1cca02ce 100644 | |
--- a/include/tudocomp/io/ViewStream.hpp | |
+++ b/include/tudocomp/io/ViewStream.hpp | |
@@ -28,6 +28,8 @@ class ViewStream { | |
virtual inline std::streampos seekpos(std::streampos sp, | |
std::ios_base::openmode which) override | |
{ | |
+ DCHECK(which == (std::ios_base::in | std::ios_base::out)); | |
+ | |
auto begin = eback(); | |
auto end = egptr(); | |
if ((size_t(begin) + sp) > size_t(end)) { | |
diff --git a/include/tudocomp/pre_header/GenericViewBase.hpp b/include/tudocomp/pre_header/GenericViewBase.hpp | |
index 946d8922..d5dab002 100644 | |
--- a/include/tudocomp/pre_header/GenericViewBase.hpp | |
+++ b/include/tudocomp/pre_header/GenericViewBase.hpp | |
@@ -58,7 +58,7 @@ protected: | |
} | |
} | |
- inline void debug_bound_check(size_t pos) const { | |
+ inline void debug_bound_check(size_t IF_DEBUG(pos)) const { | |
IF_DEBUG(bound_check(pos)); | |
} | |
diff --git a/include/tudocomp/util/Hash.hpp b/include/tudocomp/util/Hash.hpp | |
index 06420ef7..4fd38ffc 100644 | |
--- a/include/tudocomp/util/Hash.hpp | |
+++ b/include/tudocomp/util/Hash.hpp | |
@@ -393,7 +393,7 @@ class HashMap { | |
public: | |
IF_STATS( | |
size_t collisions() const { return m_collisions; } | |
- void collect_stats(Env& env) const { | |
+ void collect_stats(Env&) const { | |
StatPhase::log("collisions", collisions()); | |
StatPhase::log("table size", table_size()); | |
StatPhase::log("load factor", max_load_factor()); | |
@@ -534,7 +534,7 @@ class HashMap { | |
if(tdc_unlikely(table_size()*max_load_factor() < m_entries)) { | |
auto toinsert = std::make_pair(m_keys[tablepos], m_values[tablepos]); | |
- size_t expected_size = | |
+ size_t expected_size = | |
std::is_same<SizeManager,SizeManagerDirect>::value ? | |
(m_entries + 3.0/2.0*lz78_expected_number_of_remaining_elements(entries(),m_n,m_remaining_characters))/0.95 : | |
(m_entries + lz78_expected_number_of_remaining_elements(entries(),m_n,m_remaining_characters))/0.95; | |
diff --git a/include/tudocomp/util/divsufsort.hpp b/include/tudocomp/util/divsufsort.hpp | |
index 03384fbd..7caa19eb 100644 | |
--- a/include/tudocomp/util/divsufsort.hpp | |
+++ b/include/tudocomp/util/divsufsort.hpp | |
@@ -28,11 +28,11 @@ | |
#pragma once | |
-#include <tudocomp/util/divsufsort_def.hpp> | |
-#include <tudocomp/util/divsufsort_private.hpp> | |
-#include <tudocomp/util/divsufsort_ssort.hpp> | |
-#include <tudocomp/util/divsufsort_trsort.hpp> | |
-#include <tudocomp/util/divsufsort_bufwrapper.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_def.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_private.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_ssort.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_trsort.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_bufwrapper.hpp> | |
#include <tudocomp/ds/IntVector.hpp> | |
diff --git a/include/tudocomp/util/divsufsort_bufwrapper.hpp b/include/tudocomp/util/divsufsort/divsufsort_bufwrapper.hpp | |
similarity index 97% | |
rename from include/tudocomp/util/divsufsort_bufwrapper.hpp | |
rename to include/tudocomp/util/divsufsort/divsufsort_bufwrapper.hpp | |
index 069f0d9b..81653abe 100644 | |
--- a/include/tudocomp/util/divsufsort_bufwrapper.hpp | |
+++ b/include/tudocomp/util/divsufsort/divsufsort_bufwrapper.hpp | |
@@ -1,6 +1,6 @@ | |
#pragma once | |
-#include <tudocomp/util/divsufsort_def.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_def.hpp> | |
#include <tudocomp/ds/IntVector.hpp> | |
namespace tdc { | |
diff --git a/include/tudocomp/util/divsufsort_def.hpp b/include/tudocomp/util/divsufsort/divsufsort_def.hpp | |
similarity index 97% | |
rename from include/tudocomp/util/divsufsort_def.hpp | |
rename to include/tudocomp/util/divsufsort/divsufsort_def.hpp | |
index 9f93f9ad..78343944 100644 | |
--- a/include/tudocomp/util/divsufsort_def.hpp | |
+++ b/include/tudocomp/util/divsufsort/divsufsort_def.hpp | |
@@ -6,7 +6,7 @@ namespace tdc { | |
namespace libdivsufsort { | |
// core type definitions | |
-using saidx_t = std::make_signed<len_t>::type; | |
+using saidx_t = ssize_t; | |
using saint_t = int; | |
using sauchar_t = uliteral_t; | |
diff --git a/include/tudocomp/util/divsufsort_private.hpp b/include/tudocomp/util/divsufsort/divsufsort_private.hpp | |
similarity index 100% | |
rename from include/tudocomp/util/divsufsort_private.hpp | |
rename to include/tudocomp/util/divsufsort/divsufsort_private.hpp | |
diff --git a/include/tudocomp/util/divsufsort_ssort.hpp b/include/tudocomp/util/divsufsort/divsufsort_ssort.hpp | |
similarity index 99% | |
rename from include/tudocomp/util/divsufsort_ssort.hpp | |
rename to include/tudocomp/util/divsufsort/divsufsort_ssort.hpp | |
index b213146e..2bfa89e4 100644 | |
--- a/include/tudocomp/util/divsufsort_ssort.hpp | |
+++ b/include/tudocomp/util/divsufsort/divsufsort_ssort.hpp | |
@@ -28,8 +28,8 @@ | |
#pragma once | |
-#include <tudocomp/util/divsufsort_def.hpp> | |
-#include <tudocomp/util/divsufsort_private.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_def.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_private.hpp> | |
namespace tdc { | |
namespace libdivsufsort { | |
diff --git a/include/tudocomp/util/divsufsort_trsort.hpp b/include/tudocomp/util/divsufsort/divsufsort_trsort.hpp | |
similarity index 99% | |
rename from include/tudocomp/util/divsufsort_trsort.hpp | |
rename to include/tudocomp/util/divsufsort/divsufsort_trsort.hpp | |
index 1e4ddc33..f06b739b 100644 | |
--- a/include/tudocomp/util/divsufsort_trsort.hpp | |
+++ b/include/tudocomp/util/divsufsort/divsufsort_trsort.hpp | |
@@ -28,8 +28,8 @@ | |
#pragma once | |
-#include <tudocomp/util/divsufsort_def.hpp> | |
-#include <tudocomp/util/divsufsort_private.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_def.hpp> | |
+#include <tudocomp/util/divsufsort/divsufsort_private.hpp> | |
namespace tdc { | |
namespace libdivsufsort { | |
diff --git a/include/tudocomp_stat/Json.hpp b/include/tudocomp_stat/Json.hpp | |
index 5818ad8e..1015e7d4 100644 | |
--- a/include/tudocomp_stat/Json.hpp | |
+++ b/include/tudocomp_stat/Json.hpp | |
@@ -53,7 +53,7 @@ public: | |
/// | |
/// \return the string representation of the contained value | |
virtual inline void str( | |
- std::ostream& s, unsigned int level = 0) const override { | |
+ std::ostream& s, unsigned int = 0) const override { | |
s << m_value; | |
} | |
@@ -64,7 +64,7 @@ const char quote_char = '\"'; | |
const std::string quote_escape = "\\\""; | |
template<> | |
-inline void TValue<char>::str(std::ostream& s, unsigned int level) const { | |
+inline void TValue<char>::str(std::ostream& s, unsigned int) const { | |
s << quote_char; | |
if(m_value == quote_char) { | |
@@ -89,10 +89,10 @@ inline TValue<std::string>::TValue(const std::string& value) { | |
m_value.replace(x, 1, quote_escape); | |
pos = x+2; | |
} | |
-}; | |
+} | |
template<> | |
-inline void TValue<std::string>::str(std::ostream& s, unsigned int level) const { | |
+inline void TValue<std::string>::str(std::ostream& s, unsigned int) const { | |
s << quote_char << m_value << quote_char; | |
} | |
/// \endcond | |
diff --git a/test/doc_snippets/coder_impl.cpp b/test/doc_snippets/coder_impl.cpp | |
index aae21d85..3fc302ae 100644 | |
--- a/test/doc_snippets/coder_impl.cpp | |
+++ b/test/doc_snippets/coder_impl.cpp | |
@@ -56,7 +56,7 @@ public: | |
} | |
template<typename value_t> | |
- inline void encode(value_t v, const BitRange& r) { | |
+ inline void encode(value_t v, const BitRange&) { | |
// Encode single bits as ASCII | |
m_out->write_int(v ? '1' : '0'); | |
} | |
@@ -82,7 +82,7 @@ public: | |
} | |
template<typename value_t> | |
- inline value_t decode(const BitRange& r) { | |
+ inline value_t decode(const BitRange&) { | |
// Decode an ASCII character and compare against '0' | |
uint8_t b = m_in->read_int<uint8_t>(); | |
return (b != '0'); | |
@@ -92,7 +92,7 @@ public: | |
TEST(doc_coder_impl, test) { | |
std::stringstream ss; | |
- | |
+ | |
Range r1(75, 125); | |
Range r2(699, 702); | |
diff --git a/test/doc_snippets/stats.cpp b/test/doc_snippets/stats.cpp | |
index fc1b1372..6d9f22ab 100644 | |
--- a/test/doc_snippets/stats.cpp | |
+++ b/test/doc_snippets/stats.cpp | |
@@ -54,7 +54,7 @@ TEST(stats, example) { | |
StatPhase::wrap("Phase 3", []{ | |
// Phase 3.1 yields a complex result | |
StatPhase sub_phase("Phase 3.1"); | |
- | |
+ | |
char* result_part_1 = new char[1024]; | |
char* result_part_2 = new char[2048]; | |
std::this_thread::sleep_for(std::chrono::milliseconds(40)); | |
@@ -76,7 +76,7 @@ TEST(stats, pause_resume) { | |
// Allocate memory, but only track mem2 | |
StatPhase::pause_tracking(); | |
char* mem1 = new char[1024]; | |
- StatPhase::resume_tracking(); | |
+ StatPhase::resume_tracking(); | |
char* mem2 = new char[2048]; | |
diff --git a/test/ds_tests.cpp b/test/ds_tests.cpp | |
index fb2495a0..c117dafa 100644 | |
--- a/test/ds_tests.cpp | |
+++ b/test/ds_tests.cpp | |
@@ -87,7 +87,7 @@ void test_sa(const std::string& str, textds_t& t) { | |
} | |
template<class textds_t> | |
-void test_isa(const std::string& str, textds_t& t) { | |
+void test_isa(const std::string&, textds_t& t) { | |
auto& isa = t.require_isa(); | |
auto& sa = t.require_sa(); //request afterwards! | |
diff --git a/test/generic_int_vector_tests.cpp b/test/generic_int_vector_tests.cpp | |
index 202d1b57..23e6c03f 100644 | |
--- a/test/generic_int_vector_tests.cpp | |
+++ b/test/generic_int_vector_tests.cpp | |
@@ -279,7 +279,7 @@ namespace tdc { | |
template<class Ref, class V> | |
inline static void assign(Ref& self, V v) { | |
*self.m_ptr = v; | |
- }; | |
+ } | |
template<class Ref, class R> | |
inline static R cast_for_op(const Ref& self) { | |
@@ -540,7 +540,7 @@ TEST(integer_base, bit_ops_assign) { | |
template<class T> struct tn { constexpr const static char* str = "unknown"; }; | |
template<> struct tn<uint32_t> { constexpr const static char* str = "uint32_t"; }; | |
template<> struct tn<uint64_t> { constexpr const static char* str = "uint64_t"; }; | |
-template<class T> std::string type_name(T t) { return tn<T>::str; } | |
+template<class T> std::string type_name(T) { return tn<T>::str; } | |
TEST(uint_t, b24) { | |
uint_t<24> v; | |
diff --git a/test/test/driver_util.hpp b/test/test/driver_util.hpp | |
index 7535c441..793a2985 100644 | |
--- a/test/test/driver_util.hpp | |
+++ b/test/test/driver_util.hpp | |
@@ -139,16 +139,16 @@ std::string format_escape(const std::string& s) { | |
} | |
struct Error { | |
- bool has_error; | |
- std::string test; | |
- std::string message; | |
- std::string compress_cmd; | |
- std::string compress_stdout; | |
- std::string decompress_cmd; | |
- std::string decompress_stdout; | |
- std::string text; | |
- std::string roundtrip_text; | |
- std::string algo; | |
+ bool has_error = false; | |
+ std::string test = ""; | |
+ std::string message = ""; | |
+ std::string compress_cmd = ""; | |
+ std::string compress_stdout = ""; | |
+ std::string decompress_cmd = ""; | |
+ std::string decompress_stdout = ""; | |
+ std::string text = ""; | |
+ std::string roundtrip_text = ""; | |
+ std::string algo = ""; | |
void print_error() { | |
auto& e = *this; | |
diff --git a/test/tudocomp_driver_tests.cpp b/test/tudocomp_driver_tests.cpp | |
index a17f101a..f553a23c 100644 | |
--- a/test/tudocomp_driver_tests.cpp | |
+++ b/test/tudocomp_driver_tests.cpp | |
@@ -158,7 +158,7 @@ TEST(Registry, dynamic_options) { | |
} | |
using Compressor::Compressor; | |
- inline virtual void decompress(Input& input, Output& output) {} | |
+ inline virtual void decompress(Input&, Output&) {} | |
inline virtual void compress(Input& input, Output& output) { | |
auto s = output.as_stream(); | |
diff --git a/test/tudocomp_tests.cpp b/test/tudocomp_tests.cpp | |
index 6207706d..eb303ef8 100644 | |
--- a/test/tudocomp_tests.cpp | |
+++ b/test/tudocomp_tests.cpp | |
@@ -342,7 +342,7 @@ namespace input_nte_matrix { | |
i_copy_strat(std::move(input), | |
expected_output, | |
- [](Input& i) {}, | |
+ [](Input&) {}, | |
i_out_compare); | |
} | |
@@ -1188,9 +1188,9 @@ struct MyCompressor: public Compressor { | |
Compressor(std::move(env)), | |
custom_data(std::move(s)) {} | |
- inline virtual void decompress(Input& input, Output& output) {} | |
+ inline virtual void decompress(Input&, Output&) {} | |
- inline virtual void compress(Input& input, Output& output) { | |
+ inline virtual void compress(Input&, Output& output) { | |
A a(env().env_for_option("sub")); | |
auto s = output.as_stream(); | |
s << "ok! " << custom_data << " " << env().option("dyn").as_string(); | |
@@ -1327,8 +1327,8 @@ struct EscapingComp: public Compressor { | |
using Compressor::Compressor; | |
- virtual void compress(Input& i, Output& o) {} | |
- virtual void decompress(Input& i, Output& o) {} | |
+ virtual void compress(Input&, Output&) {} | |
+ virtual void decompress(Input&, Output&) {} | |
}; | |
TEST(Escaping, option_value_direct) { | |
@@ -1430,10 +1430,10 @@ struct KeywordlessEvalOrderBug: public Compressor { | |
KeywordlessEvalOrderBug(Env&& env): Compressor(std::move(env)){} | |
- inline virtual void decompress(Input& input, Output& output) { | |
+ inline virtual void decompress(Input&, Output&) { | |
} | |
- inline virtual void compress(Input& input, Output& output) { | |
+ inline virtual void compress(Input&, Output&) { | |
auto a = env().option("sub1").as_algorithm(); | |
auto b = env().option("dyn").as_string(); | |
auto c = env().option("sub2").as_algorithm(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment