Skip to content

Instantly share code, notes, and snippets.

@Kimundi
Created May 30, 2017 14:24
Show Gist options
  • Save Kimundi/06c79383fa0887d302f5ae214e662907 to your computer and use it in GitHub Desktop.
Save Kimundi/06c79383fa0887d302f5ae214e662907 to your computer and use it in GitHub Desktop.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 85d1e817..e649c1b5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,9 +36,10 @@ endif()
include(ExternalProject)
# More warnings and debug info
+# TODO: Add these, they break building glog -Wextra -Wpedantic
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++14 -Wall")
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=gnu++14 -DNDEBUG -march=native")
-set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=gnu++14 -O0 -ggdb -DDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -march=native")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb -DDEBUG")
find_package(Boost)
diff --git a/docs/Documentation.md b/docs/Documentation.md
index 373f7021..393beec8 100644
--- a/docs/Documentation.md
+++ b/docs/Documentation.md
@@ -1004,7 +1004,7 @@ an ASCII encoding for single bits, as identified by the
~~~ {.cpp caption="coder_impl.cpp"}
template<typename value_t>
-inline void encode(value_t v, const BitRange& r) {
+inline void encode(value_t v, const BitRange&) {
// Encode single bits as ASCII
m_out->write_int(v ? '1' : '0');
}
@@ -1014,7 +1014,7 @@ The same idea works with decoding:
~~~ {.cpp caption="coder_impl.cpp"}
template<typename value_t>
-inline value_t decode(const BitRange& r) {
+inline value_t decode(const BitRange&) {
// Decode an ASCII character and compare against '0'
uint8_t b = m_in->read_int<uint8_t>();
return (b != '0');
@@ -1475,7 +1475,7 @@ in the following snippet:
// Allocate memory, but only track mem2
StatPhase::pause_tracking();
char* mem1 = new char[1024];
-StatPhase::resume_tracking();
+StatPhase::resume_tracking();
char* mem2 = new char[2048];
diff --git a/etc/genregistry.py b/etc/genregistry.py
index 8c4fa9e3..30426109 100755
--- a/etc/genregistry.py
+++ b/etc/genregistry.py
@@ -95,6 +95,7 @@ lcpc_buffer = [
("lcpcomp::ScanDec", "compressors/lcpcomp/decompress/ScanDec.hpp", []),
("lcpcomp::DecodeForwardQueueListBuffer", "compressors/lcpcomp/decompress/DecodeQueueListBuffer.hpp", []),
("lcpcomp::CompactDec", "compressors/lcpcomp/decompress/CompactDec.hpp", []),
+ ("lcpcomp::MyMapBuffer", "compressors/lcpcomp/decompress/MyMapBuffer.hpp", []),
("lcpcomp::MultimapBuffer", "compressors/lcpcomp/decompress/MultiMapBuffer.hpp", []),
]
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/AUTHORS b/exttools/EM-SuccinctIrreducible-0.1.0/AUTHORS
deleted file mode 100644
index ea0c897a..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/AUTHORS
+++ /dev/null
@@ -1,2 +0,0 @@
-Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
-Dominik Kempa <dominik.kempa (at) gmail.com>
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/LICENCE b/exttools/EM-SuccinctIrreducible-0.1.0/LICENCE
deleted file mode 100644
index f229d208..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/LICENCE
+++ /dev/null
@@ -1,24 +0,0 @@
-Copyright (C) 2016
-Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
-Dominik Kempa <dominik.kempa (at) gmail.com>
-
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation
-files (the "Software"), to deal in the Software without
-restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the
-Software is furnished to do so, subject to the following
-conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/README b/exttools/EM-SuccinctIrreducible-0.1.0/README
deleted file mode 100644
index dfe8eb77..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/README
+++ /dev/null
@@ -1,50 +0,0 @@
-EM-SuccinctIrreducible - external memory LCP array construction algorithm
-=========================================================================
-
-
-Description
------------
-
-This package contains implementation of the external memory LCP array
-construction algorithm called EM-SuccinctIrreducible. The algorithm is
-described in the paper
-
- Juha Karkkainen and Dominik Kempa,
- Faster External Memory LCP Array Construction.
- In Proc. 24th European Symposium on Algorithms (ESA), 2016.
-
-The latest version of EM-SuccinctIrreducible is available at:
- http://www.cs.helsinki.fi/group/pads/
-
-
-
-Compilation and usage
----------------------
-
-2. Compile EM-SuccinctIrreducible using the provided Makefile
-
- $ cd src
- $ make
-
-This will produce six executables that allow computing the (P)LCP array
-of a given file sequentially and in parallel. For usage, run the
-programs without any arguments. EM-SuccinctIrreducible relies on the
-prior computation of suffix array and BWT for the input text. The
-suffix array for big files can be computed e.g. using the pSAscan
-algorithm, see https://www.cs.helsinki.fi/group/pads/pSAscan.html for
-more details.
-
-
-
-Terms of use
-------------
-
-EM-SuccinctIrreducible is released under the MIT/X11 license. See the
-file LICENCE for more details.
-
-If you use this code, please cite the paper mentioned above and publish
-the URL from which you downloaded the code.
-
-
-
-Helsinki, Aug 2016.
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/VERSION b/exttools/EM-SuccinctIrreducible-0.1.0/VERSION
deleted file mode 100644
index 6e8bf73a..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.1.0
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/Makefile b/exttools/EM-SuccinctIrreducible-0.1.0/src/Makefile
deleted file mode 100644
index 4d668fde..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/Makefile
+++ /dev/null
@@ -1,37 +0,0 @@
-SHELL = /bin/sh
-CC = $(CXX)
-CFLAGS = -Wall -Wextra -pedantic -Wshadow -funroll-loops -std=c++0x -O3 -DNDEBUG -pthread -march=native
-AUX_PAR_FLAGS = -fopenmp
-#CFLAGS = -Wall -Wextra -pedantic -Wshadow -std=c++0x -g2 -O3 -pthread
-
-all: construct_lcp_sequential \
- construct_lcp_parallel \
- construct_plcp_sequential \
- construct_plcp_parallel \
- construct_lcp_from_plcp_sequential \
- construct_lcp_from_plcp_parallel
-
-construct_lcp_sequential:
- $(CC) $(CFLAGS) -o construct_lcp_sequential em_succinct_irreducible_src/utils.cpp main_construct_lcp_array.cpp
-construct_lcp_parallel:
- $(CC) $(CFLAGS) -o construct_lcp_parallel em_succinct_irreducible_src/utils.cpp main_construct_lcp_array.cpp $(AUX_PAR_FLAGS)
-construct_plcp_sequential:
- $(CC) $(CFLAGS) -o construct_plcp_sequential em_succinct_irreducible_src/utils.cpp main_construct_plcp_bitvector.cpp
-construct_plcp_parallel:
- $(CC) $(CFLAGS) -o construct_plcp_parallel em_succinct_irreducible_src/utils.cpp main_construct_plcp_bitvector.cpp $(AUX_PAR_FLAGS)
-construct_lcp_from_plcp_sequential:
- $(CC) $(CFLAGS) -o construct_lcp_from_plcp_sequential em_succinct_irreducible_src/utils.cpp main_construct_lcp_from_plcp.cpp
-construct_lcp_from_plcp_parallel:
- $(CC) $(CFLAGS) -o construct_lcp_from_plcp_parallel em_succinct_irreducible_src/utils.cpp main_construct_lcp_from_plcp.cpp $(AUX_PAR_FLAGS)
-
-clean:
- /bin/rm -f *.o
-
-nuclear:
- /bin/rm -f construct_lcp_sequential \
- construct_lcp_parallel \
- construct_plcp_sequential \
- construct_plcp_parallel \
- construct_lcp_from_plcp_sequential \
- construct_lcp_from_plcp_parallel \
- *.o
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_B.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_B.hpp
deleted file mode 100644
index 1b17be1e..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_B.hpp
+++ /dev/null
@@ -1,500 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/compute_B.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_B_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_B_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <omp.h>
-
-#include "io/async_stream_reader.hpp"
-
-#include "set_bits.hpp"
-#include "utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-template<typename ext_text_offset_type>
-void compute_B(std::uint64_t text_length, std::uint64_t *B,
- std::string irreducible_bits_filename, std::string C_filename,
- std::uint64_t phi_undefined_position, std::uint64_t &total_io_volume) {
- fprintf(stderr, " Compute bitvector encoding of PLCP array: ");
- long double start = utils::wclock();
- std::uint64_t io_volume = 0;
-
- // Fill in the bits in B corresponding
- // to irreducible lcp values.
- {
- // Initialize reader of irreducible positions.
- typedef async_stream_reader<ext_text_offset_type> irreducible_bits_reader_type;
- irreducible_bits_reader_type *irreducible_bits_reader =
- new irreducible_bits_reader_type(irreducible_bits_filename);
-
- // Allocate the buffer.
- static const std::uint64_t buffer_size = (1UL << 20);
- ext_text_offset_type *buf = new ext_text_offset_type[buffer_size];
-#ifdef _OPENMP
- ext_text_offset_type *tempbuf = new ext_text_offset_type[buffer_size];
-#endif
-
- // Stream and set bits inside B.
- std::uint64_t count = utils::file_size(irreducible_bits_filename) / sizeof(ext_text_offset_type);
- {
- std::uint64_t items_processed = 0;
- while (items_processed < count) {
- std::uint64_t filled = std::min(count - items_processed, buffer_size);
- irreducible_bits_reader->read(buf, filled);
-#ifdef _OPENMP
- set_bits(B, 2UL * text_length, buf, filled, tempbuf);
-#else
- for (std::uint64_t j = 0; j < filled; ++j) {
- std::uint64_t idx = buf[j];
- B[idx >> 6] |= (1UL << (idx & 63));
- }
-#endif
- items_processed += filled;
- }
- }
-
- // Special case.
- {
- std::uint64_t idx = 2 * phi_undefined_position;
- B[idx >> 6] |= (1UL << (idx & 63));
- }
-
- // Update I/O volume.
- io_volume += irreducible_bits_reader->bytes_read();
-
- // Clean up.
-#ifdef _OPENMP
- delete[] tempbuf;
-#endif
- delete[] buf;
- delete irreducible_bits_reader;
- utils::file_delete(irreducible_bits_filename);
- }
-
- // Fill in reducible LCP values.
- {
- // Initialize reader of C.
- typedef async_stream_reader<std::uint64_t> C_reader_type;
- C_reader_type *C_reader = new C_reader_type(C_filename);
-
- // Initialize the bit-buffer for reader of C.
- std::uint64_t bitbuf = C_reader->read();
- std::uint64_t bitbuf_pos = 0;
- bool C_bit = (bitbuf & (1UL << (bitbuf_pos++)));
-
- // Add reducible bits.
- for (std::uint64_t j = 0; j < 2UL * text_length; ++j) {
- // Set the bit in B.
- if (C_bit == 0)
- B[j >> 6] |= (1UL << (j & 63));
-
- // Read the next bit from C.
- if (B[j >> 6] & (1UL << (j & 63))) {
- if (bitbuf_pos < 64 || C_reader->empty() == false) {
- if (bitbuf_pos == 64) {
- bitbuf = C_reader->read();
- bitbuf_pos = 0;
- }
- C_bit = (bitbuf & (1UL << (bitbuf_pos++)));
- }
- }
- }
-
- // Update I/O volume.
- io_volume += C_reader->bytes_read();
-
- // Clean up.
- delete C_reader;
- utils::file_delete(C_filename);
- }
-
- // Update I/O volume.
- total_io_volume += io_volume;
-
- // Print summary.
- long double elapsed = utils::wclock() - start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n",
- elapsed, ((1.L * io_volume) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length);
-}
-
-template<typename ext_text_offset_type>
-void compute_B(std::uint64_t text_length, std::uint64_t max_block_size_B,
- std::uint64_t phi_undefined_position, std::string B_filename, std::string C_filename,
- std::string *irreducible_bits_filenames, std::uint64_t &total_io_volume) {
- std::uint64_t n_blocks_B = (2UL * text_length + max_block_size_B - 1) / max_block_size_B;
-
- fprintf(stderr, " Compute bitvector encoding of PLCP array: ");
- long double start = utils::wclock();
-
- // Initialize reader of C.
- typedef async_stream_reader<std::uint64_t> C_reader_type;
- C_reader_type *C_reader = new C_reader_type(C_filename);
-
- // Initialize the bit-buffer for reader of C.
- std::uint64_t bitbuf = C_reader->read();
- std::uint64_t bitbuf_pos = 0;
- bool C_bit = (bitbuf & (1UL << (bitbuf_pos++)));
-
- std::uint64_t io_vol = 0;
- std::uint64_t max_block_size_B_in_words = max_block_size_B / 64;
- std::uint64_t *B = new std::uint64_t[max_block_size_B_in_words];
- std::FILE *f = utils::file_open(B_filename, "w");
-
- // Allocate the buffer.
- static const std::uint64_t buffer_size = (1UL << 20);
- ext_text_offset_type *buf = new ext_text_offset_type[buffer_size];
-#ifdef _OPENMP
- ext_text_offset_type *tempbuf = new ext_text_offset_type[buffer_size];
-#endif
-
- for (std::uint64_t block_id = 0; block_id < n_blocks_B; ++block_id) {
- std::uint64_t block_beg = block_id * max_block_size_B;
- std::uint64_t block_end = std::min(block_beg + max_block_size_B, 2 * text_length);
- std::uint64_t block_size = block_end - block_beg;
- std::uint64_t block_size_in_words = (block_size + 63) / 64;
-
- // Zero-initialize the block of B.
- std::fill(B, B + block_size_in_words, 0UL);
-
- // Initialize the reader of irreducible positions.
- typedef async_stream_reader<ext_text_offset_type> irreducible_bits_reader_type;
- irreducible_bits_reader_type *irreducible_bits_reader =
- new irreducible_bits_reader_type(irreducible_bits_filenames[block_id]);
-
- // Read and set the bits in the block of B.
- std::uint64_t count = utils::file_size(irreducible_bits_filenames[block_id]) / sizeof(ext_text_offset_type);
- {
- std::uint64_t items_processed = 0;
- while (items_processed < count) {
- std::uint64_t filled = std::min(count - items_processed, buffer_size);
- irreducible_bits_reader->read(buf, filled);
-#ifdef _OPENMP
- #pragma omp parallel for
- for (std::uint64_t j = 0; j < filled; ++j)
- buf[j] = (std::uint64_t)buf[j] - block_beg;
-
- set_bits(B, block_size, buf, filled, tempbuf);
-#else
- for (std::uint64_t j = 0; j < filled; ++j) {
- std::uint64_t idx = buf[j];
- std::uint64_t offset = idx - block_beg;
- B[offset >> 6] |= (1UL << (offset & 63));
- }
-#endif
-
- items_processed += filled;
- }
- }
-
- // Special case for 1-bit corresponding to PLCP[SA[0]].
- if (block_beg <= 2 * phi_undefined_position && 2 * phi_undefined_position < block_end) {
- std::uint64_t offset = 2 * phi_undefined_position - block_beg;
- B[offset >> 6] |= (1UL << (offset & 63));
- }
-
- // Add reducible bits.
- for (std::uint64_t j = 0; j < block_size; ++j) {
- // Set the bit in B.
- if (C_bit == 0)
- B[j >> 6] |= (1UL << (j & 63));
-
- // Read the next bit from C.
- if (B[j >> 6] & (1UL << (j & 63))) {
- if (bitbuf_pos < 64 || C_reader->empty() == false) {
- if (bitbuf_pos == 64) {
- bitbuf = C_reader->read();
- bitbuf_pos = 0;
- }
- C_bit = (bitbuf & (1UL << (bitbuf_pos++)));
- }
- }
- }
-
- // Write current block of B to file.
- utils::write_to_file(B, block_size_in_words, f);
-
- // Update I/O volume.
- io_vol += irreducible_bits_reader->bytes_read() + block_size_in_words * sizeof(std::uint64_t);
-
- // Clean up.
- delete irreducible_bits_reader;
- utils::file_delete(irreducible_bits_filenames[block_id]);
- }
-
- // Update I/O volume.
- io_vol += C_reader->bytes_read();
- total_io_volume += io_vol;
-
- // Clean up.
-#ifdef _OPENMP
- delete[] tempbuf;
-#endif
- delete[] buf;
- delete[] B;
- delete C_reader;
- std::fclose(f);
- utils::file_delete(C_filename);
-
- // Print summary.
- long double elapsed = utils::wclock() - start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", elapsed,
- ((1.L * io_vol) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length);
-}
-
-template<typename text_offset_type>
-std::uint64_t *compute_B(std::uint64_t text_length, std::string text_filename,
- std::string sa_filename, std::uint64_t &n_irreducible_lcps,
- std::uint64_t &sum_irreducible_lcps, std::uint64_t &total_io_volume) {
- // Initialize basic parameters.
- std::uint64_t local_n_irreducible_lcps = 0;
- std::uint64_t local_sum_irreducible_lcps = 0;
-
- // Allocate bitvectors.
- std::uint64_t C_size_in_words = (text_length + 63) / 64;
- std::uint64_t B_size_in_words = (2UL * text_length + 63) / 64;
- std::uint64_t *C = new std::uint64_t[C_size_in_words];
- std::uint64_t *B = new std::uint64_t[B_size_in_words];
- std::fill(C, C + C_size_in_words, 0UL);
- std::fill(B, B + B_size_in_words, 0UL);
-
- // Read text.
- std::uint8_t *text = new std::uint8_t[text_length];
- {
- // Start the timer.
- fprintf(stderr, " Read text: ");
- long double read_start = utils::wclock();
- std::uint64_t io_volume = 0;
-
- // Read data.
- utils::read_from_file(text, text_length, text_filename);
-
- // Update I/O volume.
- io_volume += text_length;
- total_io_volume += io_volume;
-
- // Print summary.
- long double read_time = utils::wclock() - read_start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", read_time,
- ((1.L * io_volume) / (1L << 20)) / read_time, (1.L * total_io_volume) / text_length);
- }
-
- // Compute irreducible lcp values.
- {
- // Start the timer.
- fprintf(stderr, " Compute irreducible LCP values: ");
- long double compute_irr_lcp_start = utils::wclock();
-
- // Initialize basic statistics.
- std::uint64_t io_volume = 0;
-
- // Initialize SA reader.
- typedef async_stream_reader<text_offset_type> sa_reader_type;
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename);
-
- // Allocate buffers.
- static const std::uint64_t buf_size = (1UL << 20);
- text_offset_type *sa_buf = new text_offset_type[buf_size];
- std::uint8_t *bwt_buf = new std::uint8_t[buf_size];
-#ifdef _OPENMP
- std::uint64_t *pair_buf = new std::uint64_t[buf_size * 2];
- std::uint64_t *ans_buf_B = new std::uint64_t[buf_size];
- std::uint64_t *ans_buf_C = new std::uint64_t[buf_size];
-#endif
-
- // Processing of SA follows.
- std::uint64_t sa_items_read = 0;
- std::uint64_t prev_sa = text_length;
- std::uint8_t prev_bwt = 0;
- while (sa_items_read < text_length) {
- std::uint64_t buf_filled = std::min(buf_size, text_length - sa_items_read);
- sa_reader->read(sa_buf, buf_filled);
-
- // Compute BWT buffer.
-#ifdef _OPENMP
- #pragma omp parallel for
- for (std::uint64_t j = 0; j < buf_filled; ++j) {
- std::uint64_t addr = (std::uint64_t)sa_buf[j];
- if (addr > 0) bwt_buf[j] = text[addr - 1];
- }
-#else
- for (std::uint64_t j = 0; j < buf_filled; ++j) {
- std::uint64_t addr = (std::uint64_t)sa_buf[j];
- if (addr > 0) bwt_buf[j] = text[addr - 1];
- }
-#endif
-
- // Process buffer.
-#ifdef _OPENMP
- {
- // Bring the irreducible pairs together.
- std::uint64_t buf_irr_filled = 0;
- for (std::uint64_t j = 0; j < buf_filled; ++j) {
- std::uint64_t cur_sa = (std::uint64_t)sa_buf[j];
- std::uint8_t cur_bwt = bwt_buf[j];
- if ((sa_items_read == 0 && j == 0) || (cur_sa == 0) || (prev_sa == 0) || (cur_bwt != prev_bwt)) {
- pair_buf[2 * buf_irr_filled] = cur_sa;
- pair_buf[2 * buf_irr_filled + 1] = prev_sa;
- ++buf_irr_filled;
- }
- prev_sa = cur_sa;
- prev_bwt = cur_bwt;
- }
-
- // Update statistics.
- local_n_irreducible_lcps += buf_irr_filled;
-
- if (buf_irr_filled > 0) {
- // Compute lcp values in parallel.
- #pragma omp parallel
- {
- std::uint64_t thread_sum_irreducible_lcps = 0;
-
- #pragma omp for nowait
- for (std::uint64_t j = 0; j < buf_irr_filled; ++j) {
- std::uint64_t i = pair_buf[2 * j];
- std::uint64_t phi_i = pair_buf[2 * j + 1];
- std::uint64_t lcp = 0;
- while (i + lcp < text_length && phi_i + lcp < text_length &&
- text[i + lcp] == text[phi_i + lcp]) ++lcp;
- thread_sum_irreducible_lcps += lcp;
- ans_buf_C[j] = i;
- ans_buf_B[j] = 2 * i + lcp;
- }
-
- #pragma omp critical
- {
- local_sum_irreducible_lcps += thread_sum_irreducible_lcps;
- }
- }
-
- // Set the bits in B and C in parallel.
- set_bits(B, 2UL * text_length, ans_buf_B, buf_irr_filled, pair_buf);
- set_bits(C, 1UL * text_length, ans_buf_C, buf_irr_filled, pair_buf);
- }
- }
-#else
- for (std::uint64_t j = 0; j < buf_filled; ++j) {
- std::uint64_t cur_sa = (std::uint64_t)sa_buf[j];
- std::uint8_t cur_bwt = bwt_buf[j];
- if ((sa_items_read == 0 && j == 0) || (cur_sa == 0) || (prev_sa == 0) || (cur_bwt != prev_bwt)) {
- // Compute irreducible lcp(cur_sa, prev_sa) naively.
- std::uint64_t lcp = 0;
- while (cur_sa + lcp < text_length && prev_sa + lcp < text_length &&
- text[cur_sa + lcp] == text[prev_sa + lcp]) ++lcp;
-
- // Set the corresponding bits in the B and C.
- std::uint64_t bv_idx = 2UL * cur_sa + lcp;
- B[bv_idx >> 6] |= (1UL << (bv_idx & 63));
- C[cur_sa >> 6] |= (1UL << (cur_sa & 63));
-
- // Update statistics.
- ++local_n_irreducible_lcps;
- local_sum_irreducible_lcps += lcp;
- }
-
- prev_sa = cur_sa;
- prev_bwt = cur_bwt;
- }
-#endif
-
- sa_items_read += buf_filled;
- }
-
- // Update I/O volume.
- io_volume += sa_reader->bytes_read();
- total_io_volume += io_volume;
-
- // Clean up.
- delete[] sa_buf;
- delete[] bwt_buf;
- delete sa_reader;
-#ifdef _OPENMP
- delete[] pair_buf;
- delete[] ans_buf_B;
- delete[] ans_buf_C;
-#endif
-
- // Print summary.
- long double compute_irr_lcp_time = utils::wclock() - compute_irr_lcp_start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB, total I/O vol = %.2Lfn\n", compute_irr_lcp_time,
- ((1.L * io_volume) / (1L << 20)) / compute_irr_lcp_time, (1.L * total_io_volume) / text_length);
- }
-
- // Clean up.
- delete[] text;
-
- // Fill in reducible LCP values.
- {
- fprintf(stderr, " Fill missing reducible LCP values: ");
- long double fill_in_reduc_start = utils::wclock();
-
- std::uint64_t B_ptr = 0;
- for (std::uint64_t j = 0; j < text_length; ++j) {
- if ((C[j >> 6] & (1UL << (j & 63))) == 0) {
- // Mark the 1-bit corresponding to reducible LCP value.
- B[B_ptr >> 6] |= (1UL << (B_ptr & 63));
- } else {
- // Find the next 1-bit in B.
- while ((B[B_ptr >> 6] & (1UL << (B_ptr & 63))) == 0)
- ++B_ptr;
- }
- ++B_ptr;
- }
-
- // Print summary.
- long double fill_in_reduc_time = utils::wclock() - fill_in_reduc_start;
- fprintf(stderr, "time = %.2Lfs\n", fill_in_reduc_time);
- }
-
- // Clean up.
- delete[] C;
-
- // Update reference variables.
- n_irreducible_lcps = local_n_irreducible_lcps;
- sum_irreducible_lcps = local_sum_irreducible_lcps;
-
- // Return the pointer to B.
- return B;
-}
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_B_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_array.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_array.hpp
deleted file mode 100644
index a1c7e0ec..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_array.hpp
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/compute_lcp_array.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_ARRAY_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_ARRAY_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstdint>
-#include <ctime>
-#include <string>
-#include <limits>
-#include <algorithm>
-#include <unistd.h>
-
-#include "compute_plcp_bitvector.hpp"
-#include "compute_lcp_from_plcp.hpp"
-#include "utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-template<typename text_offset_type, typename ext_text_offset_type>
-void compute_lcp_array(std::uint64_t text_length, std::uint64_t ram_use,
- std::string text_filename, std::string sa_filename, std::string bwt_filename,
- std::string output_filename, std::uint64_t &max_lcp, std::uint64_t &lcp_sum,
- std::uint64_t &n_irreducible_lcps, std::uint64_t &sum_irreducible_lcps,
- std::uint64_t &total_io_volume) {
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use;
- if (text_to_ram_ratio > 4.0L) {
- // Not enough RAM to hold B in RAM.
- std::string B_filename = output_filename + ".plcp." + utils::random_string_hash();
- compute_plcp_bitvector_small_ram<text_offset_type, ext_text_offset_type>(text_length, ram_use, text_filename,
- sa_filename, bwt_filename, B_filename, n_irreducible_lcps, sum_irreducible_lcps, total_io_volume);
-
- compute_lcp_from_plcp<text_offset_type>(text_length, ram_use, sa_filename,
- output_filename, B_filename, total_io_volume, max_lcp, lcp_sum);
- } else {
- // Enough RAM to hold B in RAM.
- std::uint64_t *B = compute_plcp_bitvector_large_ram<text_offset_type, ext_text_offset_type>(text_length,
- ram_use, text_filename, sa_filename, bwt_filename, output_filename, n_irreducible_lcps,
- sum_irreducible_lcps, total_io_volume);
-
- compute_lcp_from_plcp<text_offset_type>(text_length, ram_use, B, sa_filename,
- output_filename, total_io_volume, max_lcp, lcp_sum);
- }
-}
-
-template<typename text_offset_type, typename ext_text_offset_type>
-void compute_lcp_array(std::string text_filename, std::string sa_filename,
- std::string bwt_filename, std::string output_filename, std::uint64_t ram_use) {
- srand(time(0) + getpid());
- utils::drop_disk_pages(text_filename);
- utils::drop_disk_pages(sa_filename);
- utils::drop_disk_pages(bwt_filename);
- long double global_start = utils::wclock();
-
- // Initialize basic parameters.
- std::uint64_t text_length = utils::file_size(text_filename);
- std::uint64_t lcp_sum = 0;
- std::uint64_t max_lcp = 0;
- std::uint64_t n_irreducible_lcps = 0;
- std::uint64_t sum_irreducible_lcps = 0;
- std::uint64_t total_io_volume = 0;
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use;
-
- if (text_length == 0) {
- fprintf(stderr, "Error: the input file is empty!\n");
- std::exit(EXIT_FAILURE);
- }
-
- // Turn paths absolute.
- text_filename = utils::absolute_path(text_filename);
- sa_filename = utils::absolute_path(sa_filename);
- bwt_filename = utils::absolute_path(bwt_filename);
- output_filename = utils::absolute_path(output_filename);
-
- // Print summary of basic parameters.
- fprintf(stderr, "Text filename = %s\n", text_filename.c_str());
- fprintf(stderr, "SA filename = %s\n", sa_filename.c_str());
- fprintf(stderr, "BWT filename = %s\n", bwt_filename.c_str());
- fprintf(stderr, "Output filename = %s\n", output_filename.c_str());
- fprintf(stderr, "Text length = %lu (%.2LfMiB)\n", text_length, 1.L * text_length / (1 << 20));
- fprintf(stderr, "Text size / ram_use = %.2Lf\n", text_to_ram_ratio);
- fprintf(stderr, "RAM use = %lu (%.2LfMiB)\n", ram_use, ram_use / (1024.L * 1024));
- fprintf(stderr, "sizeof(text_offset_type) = %lu\n", sizeof(text_offset_type));
- fprintf(stderr, "sizeof(ext_text_offset_type) = %lu\n", sizeof(ext_text_offset_type));
-#ifdef _OPENMP
- fprintf(stderr, "Max number of threads = %d\n", omp_get_max_threads());
-#endif
- fprintf(stderr, "\n");
-
- compute_lcp_array<text_offset_type, ext_text_offset_type>(text_length, ram_use, text_filename,
- sa_filename, bwt_filename, output_filename, max_lcp, lcp_sum,
- n_irreducible_lcps, sum_irreducible_lcps, total_io_volume);
-
- // Print summary.
- long double total_time = utils::wclock() - global_start;
- long double avg_lcp = (long double)lcp_sum / text_length;
- fprintf(stderr, "\n\nComputation finished. Summary:\n");
- fprintf(stderr, " elapsed time = %.2Lfs (%.3Lfs/MiB of text)\n", total_time, total_time / (1.L * text_length / (1L << 20)));
- fprintf(stderr, " speed = %.2LfMiB of text/s\n", (1.L * text_length / (1L << 20)) / total_time);
- fprintf(stderr, " I/O volume = %lu (%.2Lfbytes/input symbol)\n", total_io_volume, (1.L * total_io_volume) / text_length);
- fprintf(stderr, " number of irreducible LCPs = %lu\n", n_irreducible_lcps);
- fprintf(stderr, " sum of irreducible LCPs = %lu\n", sum_irreducible_lcps);
- fprintf(stderr, " sum of all LCPs = %lu\n", lcp_sum);
- fprintf(stderr, " average LCP = %.2Lf\n", avg_lcp);
- fprintf(stderr, " maximal LCP = %lu\n", max_lcp);
-}
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_ARRAY_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_from_plcp.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_from_plcp.hpp
deleted file mode 100644
index 23e99ae4..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_lcp_from_plcp.hpp
+++ /dev/null
@@ -1,388 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/compute_lcp_from_plcp.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_FROM_PLCP_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_FROM_PLCP_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <string>
-#include <algorithm>
-#include <omp.h>
-
-#include "io/async_stream_reader.hpp"
-#include "io/async_stream_writer.hpp"
-#include "io/async_multi_stream_writer.hpp"
-#include "io/async_multipart_file_writer.hpp"
-#include "io/async_multipart_multifile_reader.hpp"
-#include "utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-template<typename text_offset_type>
-void compute_lcp_from_plcp(std::uint64_t text_length, std::uint64_t ram_use,
- std::string sa_filename, std::string output_filename, std::string B_filename,
- std::uint64_t &global_io_volume, std::uint64_t &max_lcp, std::uint64_t &lcp_sum,
- bool keep_plcp = false) {
- fprintf(stderr, "Convert PLCP to LCP:\n");
- long double convert_plcp_to_lcp_start = utils::wclock();
-
- // Initialize basic parameters.
- std::uint64_t max_block_size = ram_use / sizeof(text_offset_type);
- std::uint64_t n_blocks = (text_length + max_block_size - 1) / max_block_size;
- std::uint64_t local_lcp_sum = 0;
- std::uint64_t local_max_lcp = 0;
- std::uint64_t total_io_volume = 0;
-
- // Print info about blocks.
- fprintf(stderr, " Max block size = %lu (%.2LfMiB)\n", max_block_size, (1.L * max_block_size) / (1L << 20));
- fprintf(stderr, " Number of blocks = %lu\n", n_blocks);
-
- // Set the filenames of files storing SA and LCP subsequences.
- std::string *sa_subsequences_filenames = new std::string[n_blocks];
- std::string *lcp_subsequences_filenames = new std::string[n_blocks];
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) {
- sa_subsequences_filenames[block_id] = output_filename + ".sa_subseq." + utils::intToStr(block_id) + "." + utils::random_string_hash();
- lcp_subsequences_filenames[block_id] = output_filename + ".lcp_sebseq." + utils::intToStr(block_id) + "." + utils::random_string_hash();
- }
-
- // Compute SA subsequences.
- {
- fprintf(stderr, " Compute SA subsequences: ");
- long double compute_sa_subseq_start = utils::wclock();
- std::uint64_t io_volume = 0;
-
- // Initialize streaming of suffix array.
- typedef async_stream_reader<text_offset_type> sa_reader_type;
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename);
-
- // Initialize multifile writer of SA subsequences.
- static const std::uint64_t n_free_buffers = 4;
- std::uint64_t total_buffers_ram = ram_use;
- std::uint64_t buffer_size = std::min((16UL << 20), total_buffers_ram / (n_blocks + n_free_buffers));
- typedef async_multi_stream_writer<text_offset_type> sa_multiwriter_type;
- sa_multiwriter_type *sa_multiwriter = new sa_multiwriter_type(buffer_size, n_free_buffers);
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id)
- sa_multiwriter->add_file(sa_subsequences_filenames[block_id]);
-
- // Read SA / write SA subsequences.
- for (std::uint64_t j = 0; j < text_length; ++j) {
- std::uint64_t sa_j = sa_reader->read();
- std::uint64_t block_id = sa_j / max_block_size;
- sa_multiwriter->write_to_ith_file(block_id, sa_j);
- }
-
- // Update I/O volume.
- io_volume += sa_reader->bytes_read() + sa_multiwriter->bytes_written();
- total_io_volume += io_volume;
-
- // Clean up.
- delete sa_reader;
- delete sa_multiwriter;
-
- // Print summary.
- long double compute_sa_subseq_time = utils::wclock() - compute_sa_subseq_start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", compute_sa_subseq_time,
- ((1.L * io_volume) / (1L << 20)) / compute_sa_subseq_time, (1.L * total_io_volume) / text_length);
- }
-
- // Compute LCP subsequences.
- {
- fprintf(stderr, " Compute LCP subsequences: ");
- long double compute_lcp_subseq_start = utils::wclock();
- std::uint64_t io_volume = 0;
-
- // Allocate the array holding the block of PLCP.
- text_offset_type *plcp_block = new text_offset_type[max_block_size];
-
- // Initialize reading of PLCP bitvector.
- typedef async_stream_reader<std::uint64_t> plcp_bitvector_reader_type;
- plcp_bitvector_reader_type *plcp_bitvector_reader = new plcp_bitvector_reader_type(B_filename);
- std::uint64_t bitbuf = plcp_bitvector_reader->read();
- std::uint64_t bitpos = 0;
- std::uint64_t cur_plcp = 1;
-
- // Allocate buffer.
- static const std::uint64_t buffer_size = (1UL << 20);
- text_offset_type *buf = new text_offset_type[buffer_size];
- text_offset_type *outbuf = new text_offset_type[buffer_size];
-
- // Process blocks left to right.
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) {
- std::uint64_t block_beg = block_id * max_block_size;
- std::uint64_t block_end = std::min(block_beg + max_block_size, text_length);
- std::uint64_t block_size = block_end - block_beg;
-
- // Read a block of PLCP into RAM.
- for (std::uint64_t j = 0; j < block_size; ++j) {
- // Increment cur_plcp for every 0 in the bitvector.
- while ((bitbuf & (1UL << bitpos)) == 0) {
- ++cur_plcp;
- ++bitpos;
- if (bitpos == 64) {
- bitbuf = plcp_bitvector_reader->read();
- bitpos = 0;
- }
- }
-
- // We decrement last because cur_plcp is unsigned.
- --cur_plcp;
- plcp_block[j] = cur_plcp;
-
- // Skip the 1-bit in the bitvector.
- ++bitpos;
- if (bitpos == 64) {
- if (plcp_bitvector_reader->empty() == false)
- bitbuf = plcp_bitvector_reader->read();
- bitpos = 0;
- }
- }
-
- // Compute LCP subsequence and write to file.
- {
- // Initialize SA subsequence reader.
- typedef async_stream_reader<text_offset_type> sa_subseq_reader_type;
- sa_subseq_reader_type *sa_subseq_reader =
- new sa_subseq_reader_type(sa_subsequences_filenames[block_id]);
-
- // Initialize LCP subsequence writer.
- std::uint64_t single_file_max_bytes = text_length / (n_blocks * 2UL); // 10UL
- typedef async_multipart_file_writer<text_offset_type> lcp_subseq_writer_type;
- lcp_subseq_writer_type *lcp_subseq_writer =
- new lcp_subseq_writer_type(lcp_subsequences_filenames[block_id], single_file_max_bytes);
-
- // Compute LCP subsequence.
- std::uint64_t subseq_size = utils::file_size(sa_subsequences_filenames[block_id]) / sizeof(text_offset_type);
- std::uint64_t items_processed = 0;
- while (items_processed < subseq_size) {
- std::uint64_t filled = std::min(buffer_size, subseq_size - items_processed);
- sa_subseq_reader->read(buf, filled);
-#ifdef _OPENMP
- #pragma omp parallel for
- for (std::uint64_t j = 0; j < filled; ++j) {
- std::uint64_t sa_val = buf[j];
- std::uint64_t lcp_val = plcp_block[sa_val - block_beg];
- outbuf[j] = lcp_val;
- }
-#else
- for (std::uint64_t j = 0; j < filled; ++j) {
- std::uint64_t sa_val = buf[j];
- std::uint64_t lcp_val = plcp_block[sa_val - block_beg];
- outbuf[j] = lcp_val;
- }
-#endif
- lcp_subseq_writer->write(outbuf, filled);
- items_processed += filled;
- }
-
- // Update I/O volume.
- io_volume += sa_subseq_reader->bytes_read() + lcp_subseq_writer->bytes_written();
-
- // Clean up.
- delete sa_subseq_reader;
- delete lcp_subseq_writer;
- }
-
- utils::file_delete(sa_subsequences_filenames[block_id]);
- }
-
- // Update I/O volume.
- io_volume += plcp_bitvector_reader->bytes_read();
- total_io_volume += io_volume;
-
- // Clean up.
- delete[] buf;
- delete[] outbuf;
- delete[] plcp_block;
- delete plcp_bitvector_reader;
- if (keep_plcp == false)
- utils::file_delete(B_filename);
-
- // Print summary.
- long double compute_lcp_subseq_time = utils::wclock() - compute_lcp_subseq_start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", compute_lcp_subseq_time,
- ((1.L * io_volume) / (1L << 20)) / compute_lcp_subseq_time, (1.L * total_io_volume) / text_length);
- }
-
- // Merge LCP subsequences.
- {
- fprintf(stderr, " Merge LCP subsequences: ");
- long double merge_lcp_subseq_start = utils::wclock();
- std::uint64_t io_volume = 0;
-
- // Initialize the reader of LCP subsequences.
- std::uint64_t total_buffers_ram = ram_use;
- std::uint64_t buffer_size = total_buffers_ram / (2UL * n_blocks);
- typedef async_multipart_multifile_reader<text_offset_type> lcp_subseq_multireader_type;
- lcp_subseq_multireader_type *lcp_subseq_multireader =
- new lcp_subseq_multireader_type(n_blocks, buffer_size);
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id)
- lcp_subseq_multireader->add_file(lcp_subsequences_filenames[block_id]);
-
- // Initialize the writer of the final LCP array.
- typedef async_stream_writer<text_offset_type> lcp_writer_type;
- lcp_writer_type *lcp_writer = new lcp_writer_type(output_filename);
-
- // Initialize the reader of SA.
- typedef async_stream_reader<text_offset_type> sa_reader_type;
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename);
-
- // Compute final LCP.
- for (std::uint64_t j = 0; j < text_length; ++j) {
- std::uint64_t sa_j = sa_reader->read();
- std::uint64_t block_id = sa_j / max_block_size;
- std::uint64_t lcp_j = lcp_subseq_multireader->read_from_ith_file(block_id);
- local_max_lcp = std::max(local_max_lcp, lcp_j);
- local_lcp_sum += lcp_j;
- lcp_writer->write(lcp_j);
- }
-
- // Update I/O volume.
- io_volume += sa_reader->bytes_read() + lcp_subseq_multireader->bytes_read() + lcp_writer->bytes_written();
- total_io_volume += io_volume;
-
- // Clean up.
- delete sa_reader;
- delete lcp_writer;
- delete lcp_subseq_multireader;
-
- // Print summary.
- long double merge_lcp_subseq_time = utils::wclock() - merge_lcp_subseq_start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", merge_lcp_subseq_time,
- ((1.L * io_volume) / (1L << 20)) / merge_lcp_subseq_time, (1.L * total_io_volume) / text_length);
- }
-
- // Clean up.
- delete[] sa_subsequences_filenames;
- delete[] lcp_subsequences_filenames;
-
- // Print summary.
- long double convert_plcp_to_lcp_time = utils::wclock() - convert_plcp_to_lcp_start;
- fprintf(stderr, "Summary: time = %.2Lfs, total I/O vol = %.2Lfn\n",
- convert_plcp_to_lcp_time, (1.L * total_io_volume) / text_length);
-
- // Update reference variables.
- global_io_volume += total_io_volume;
- max_lcp = local_max_lcp;
- lcp_sum = local_lcp_sum;
-}
-
-template<typename text_offset_type>
-void compute_lcp_from_plcp(std::uint64_t text_length, std::uint64_t ram_use, std::uint64_t *B,
- std::string sa_filename, std::string output_filename, std::uint64_t &total_io_volume,
- std::uint64_t &max_lcp, std::uint64_t &lcp_sum) {
- // Write B to disk.
- std::string B_filename = output_filename + ".plcp." + utils::random_string_hash();
- {
- // Start the timer.
- fprintf(stderr, "Write PLCP bitvector to disk: ");
- long double write_plcp_start = utils::wclock();
- std::uint64_t io_volume = 0;
-
- // Write the data.
- std::uint64_t length_of_B_in_words = (2UL * text_length + 63) / 64;
- utils::write_to_file(B, length_of_B_in_words, B_filename);
-
- // Update I/O volume.
- io_volume += length_of_B_in_words * sizeof(std::uint64_t);
- total_io_volume += io_volume;
- long double write_plcp_time = utils::wclock() - write_plcp_start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, I/O vol = %.2Lfn\n\n", write_plcp_time,
- ((1.L * io_volume) / (1L << 20)) / write_plcp_time, (1.L * io_volume) / text_length);
- }
- delete[] B;
-
- // Convert PLCP to LCP using EM method.
- compute_lcp_from_plcp<text_offset_type>(text_length, ram_use, sa_filename,
- output_filename, B_filename, total_io_volume, max_lcp, lcp_sum);
-}
-
-template<typename text_offset_type>
-void compute_lcp_from_plcp(std::string input_filename, std::string sa_filename,
- std::string output_filename, std::uint64_t ram_use) {
- srand(time(0) + getpid());
- utils::drop_disk_pages(input_filename);
- utils::drop_disk_pages(sa_filename);
- long double start = utils::wclock();
- std::uint64_t io_volume = 0;
-
- // Compute basic parameters.
- std::uint64_t text_length = utils::file_size(sa_filename) / sizeof(text_offset_type);
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use;
-
- if (text_length == 0) {
- fprintf(stderr, "Error: the input file is empty!\n");
- std::exit(EXIT_FAILURE);
- }
-
- // Turn paths absolute.
- input_filename = utils::absolute_path(input_filename);
- sa_filename = utils::absolute_path(sa_filename);
- output_filename = utils::absolute_path(output_filename);
-
- // Print summary of basic parameters.
- fprintf(stderr, "PLCP filename = %s\n", input_filename.c_str());
- fprintf(stderr, "SA filename = %s\n", sa_filename.c_str());
- fprintf(stderr, "Output filename = %s\n", output_filename.c_str());
- fprintf(stderr, "Text length = %lu (%.2LfMiB)\n", text_length, 1.L * text_length / (1 << 20));
- fprintf(stderr, "Text size / ram_use = %.2Lf\n", text_to_ram_ratio);
- fprintf(stderr, "RAM use = %lu (%.2LfMiB)\n", ram_use, ram_use / (1024.L * 1024));
- fprintf(stderr, "sizeof(text_offset_type) = %lu\n", sizeof(text_offset_type));
-#ifdef _OPENMP
- fprintf(stderr, "Max number of threads = %d\n", omp_get_max_threads());
-#endif
- fprintf(stderr, "\n");
-
- std::uint64_t lcp_sum = 0;
- std::uint64_t max_lcp = 0;
-
- // Convert the PCLP array (bitvector representation) to LCP array.
- compute_lcp_from_plcp<text_offset_type>(text_length, ram_use, sa_filename,
- output_filename, input_filename, io_volume, max_lcp, lcp_sum, true);
-
- // Print summary.
- long double total_time = utils::wclock() - start;
- long double avg_lcp = (long double)lcp_sum / text_length;
- fprintf(stderr, "\n\nComputation finished. Summary:\n");
- fprintf(stderr, " elapsed time = %.2Lfs (%.3Lfs/MiB of text)\n", total_time, total_time / (1.L * text_length / (1L << 20)));
- fprintf(stderr, " speed = %.2LfMiB of text/s\n", (1.L * text_length / (1L << 20)) / total_time);
- fprintf(stderr, " I/O volume = %lu (%.2Lfbytes/input symbol)\n", io_volume, (1.L * io_volume) / text_length);
- fprintf(stderr, " sum of all LCPs = %lu\n", lcp_sum);
- fprintf(stderr, " average LCP = %.2Lf\n", avg_lcp);
- fprintf(stderr, " maximal LCP = %lu\n", max_lcp);
-}
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_LCP_FROM_PLCP_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_plcp_bitvector.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_plcp_bitvector.hpp
deleted file mode 100644
index e9724e39..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/compute_plcp_bitvector.hpp
+++ /dev/null
@@ -1,318 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/compute_plcp_bitvector.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_PLCP_BITVECTOR_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_PLCP_BITVECTOR_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <ctime>
-#include <string>
-#include <algorithm>
-#include <omp.h>
-#include <unistd.h>
-
-#include "utils.hpp"
-#include "distribute_pairs_and_compute_C.hpp"
-#include "process_halfsegment_pairs.hpp"
-#include "compute_B.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-// A version that returns the B bitvector as a file on disk.
-template<typename text_offset_type, typename ext_text_offset_type>
-void compute_plcp_bitvector_small_ram(std::uint64_t text_length, std::uint64_t ram_use,
- std::string text_filename, std::string sa_filename, std::string bwt_filename,
- std::string B_filename, std::uint64_t &n_irreducible_lcps,
- std::uint64_t &sum_irreducible_lcps, std::uint64_t &total_io_volume) {
- fprintf(stderr, "Compute PLCP bitvector (dest = EM):\n");
- long double compute_plcp_bitvector_start = utils::wclock();
-
- // Initialize basic parameters.
- static const std::uint64_t max_overflow_size = (1UL << 20);
- std::uint64_t max_halfsegment_size = std::max(1UL, ram_use / 2);
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size;
- std::uint64_t n_different_halfsegment_pairs = (n_halfsegments * (n_halfsegments + 1)) / 2;
- std::uint64_t io_volume = 0;
- std::uint64_t max_block_size_B = std::max(64UL, (((ram_use * 8UL) >> 6) << 6));
- std::uint64_t n_blocks_B = (2UL * text_length + max_block_size_B - 1) / max_block_size_B;
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use;
-
- // Print info about halfsegments.
- fprintf(stderr, " Max halfsegment size = %lu (%.2LfMiB)\n", max_halfsegment_size, (1.L * max_halfsegment_size / (1UL << 20)));
- fprintf(stderr, " Number of halfsegments = %lu\n", n_halfsegments);
- fprintf(stderr, " Number of halfsegment pairs = %lu\n", n_different_halfsegment_pairs);
-
- // Initialize file names with halfsegment pairs.
- std::string **pairs_filenames = new std::string*[n_halfsegments];
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) {
- pairs_filenames[i] = new std::string[n_halfsegments];
- for (std::uint64_t j = i; j < n_halfsegments; ++j) {
- std::string filename = B_filename + ".pairs." + utils::intToStr(i) + "_" + utils::intToStr(j);
- pairs_filenames[i][j] = filename;
- }
- }
-
- // Distribute pairs (i, Phi[i]) such that PLCP[i] is irreducible
- // into files corresponding to different halfsegment pairs and
- // compute the C bitvector.
- std::string C_filename = B_filename + ".irreducible_positions_bv";
- std::uint64_t phi_undefined_position = 0;
- if (text_to_ram_ratio > 8.0L) {
- // Distribute pairs.
- phi_undefined_position = distribute_pairs<text_offset_type>(text_length, max_halfsegment_size,
- ram_use, sa_filename, bwt_filename, pairs_filenames, n_irreducible_lcps, io_volume);
-
- // Compute C.
- compute_C<text_offset_type>(text_length, max_halfsegment_size, ram_use, phi_undefined_position,
- pairs_filenames, sa_filename, bwt_filename, C_filename, io_volume);
- } else {
- // Distribute pairs and compute C.
- phi_undefined_position = distribute_pairs_and_compute_C<text_offset_type>(text_length,
- max_halfsegment_size, ram_use, sa_filename, bwt_filename, C_filename,
- pairs_filenames, n_irreducible_lcps, io_volume);
- }
-
- std::string *irreducible_bits_filenames = new std::string[n_blocks_B];
- for (std::uint64_t block_id = 0; block_id < n_blocks_B; ++block_id) {
- std::string filename = B_filename + ".irreducible_bits_bv." + utils::intToStr(block_id);
- irreducible_bits_filenames[block_id] = filename;
- }
-
- // Process all pairs of halfsegments.
- sum_irreducible_lcps = process_halfsegment_pairs<text_offset_type, ext_text_offset_type>(text_filename,
- text_length, max_block_size_B, max_halfsegment_size, max_overflow_size,
- pairs_filenames, irreducible_bits_filenames, io_volume);
-
- // Clean up.
- for (std::uint64_t halfseg_id = 0; halfseg_id < n_halfsegments; ++halfseg_id)
- delete[] pairs_filenames[halfseg_id];
- delete[] pairs_filenames;
-
- // Compute B.
- compute_B<ext_text_offset_type>(text_length, max_block_size_B, phi_undefined_position,
- B_filename, C_filename, irreducible_bits_filenames, io_volume);
-
- // Update I/O volume.
- total_io_volume += io_volume;
-
- // Clean up.
- delete[] irreducible_bits_filenames;
-
- // Print summary.
- long double compute_plcp_bitvector_time = utils::wclock() - compute_plcp_bitvector_start;
- fprintf(stderr, "Summary: time = %.2Lfs, total I/O vol = %.2Lfn\n\n",
- compute_plcp_bitvector_time, (1.L * io_volume) / text_length);
-}
-
-// A version, that returns a pointer to B bitvector. Requires at least 2n bits of RAM.
-template<typename text_offset_type, typename ext_text_offset_type>
-std::uint64_t* compute_plcp_bitvector_large_ram(std::uint64_t text_length, std::uint64_t ram_use,
- std::string text_filename, std::string sa_filename, std::string bwt_filename,
- std::string output_filename, std::uint64_t &n_irreducible_lcps,
- std::uint64_t &sum_irreducible_lcps, std::uint64_t &total_io_volume) {
- fprintf(stderr, "Compute PLCP bitvector (dest = RAM):\n");
- long double compute_plcp_bitvector_start = utils::wclock();
-
- // Initialize basic parameters.
- long double ram_to_text_ratio = (long double)ram_use / (long double)text_length;
- std::uint64_t *B = NULL;
- std::uint64_t io_volume = 0;
-
- if (ram_to_text_ratio < 1.375L) {
- // Initialize basic parameters.
- static const std::uint64_t max_overflow_size = (1UL << 20);
- std::uint64_t max_halfsegment_size = std::max(1UL, ram_use / 2);
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size;
- std::uint64_t n_different_halfsegment_pairs = (n_halfsegments * (n_halfsegments + 1)) / 2;
-
- // Print info about halfsegments.
- fprintf(stderr, " Max halfsegment size = %lu (%.2LfMiB)\n", max_halfsegment_size, (1.L * max_halfsegment_size / (1UL << 20)));
- fprintf(stderr, " Number of halfsegments = %lu\n", n_halfsegments);
- fprintf(stderr, " Number of halfsegment pairs = %lu\n", n_different_halfsegment_pairs);
-
- // Initialize file names with halfsegment pairs.
- std::string **pairs_filenames = new std::string*[n_halfsegments];
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) {
- pairs_filenames[i] = new std::string[n_halfsegments];
- for (std::uint64_t j = i; j < n_halfsegments; ++j) {
- std::string filename = output_filename + ".pairs." + utils::intToStr(i) + "_" + utils::intToStr(j);
- pairs_filenames[i][j] = filename;
- }
- }
-
- // Distribute pairs (i, Phi[i]) such that PLCP[i] is irreducible
- // into files corresponding to different halfsegment pairs and
- // compute the C bitvector.
- std::string C_filename = output_filename + ".irreducible_positions_bv";
- std::uint64_t phi_undefined_position = distribute_pairs_and_compute_C<text_offset_type>(text_length,
- max_halfsegment_size, ram_use, sa_filename, bwt_filename, C_filename, pairs_filenames,
- n_irreducible_lcps, io_volume);
-
- // Process all pairs of halfsegments.
- std::string irreducible_bits_filename = output_filename + ".irreducible_bits";
- sum_irreducible_lcps = process_halfsegment_pairs<text_offset_type, ext_text_offset_type>(text_filename,
- text_length, max_halfsegment_size, max_overflow_size, pairs_filenames,
- irreducible_bits_filename, io_volume);
-
- // Clean up.
- for (std::uint64_t halfseg_id = 0; halfseg_id < n_halfsegments; ++halfseg_id)
- delete[] pairs_filenames[halfseg_id];
- delete[] pairs_filenames;
-
- // Allocate B.
- std::uint64_t B_size_in_words = (2UL * text_length + 63) / 64;
- B = new std::uint64_t[B_size_in_words];
- std::fill(B, B + B_size_in_words, 0UL);
-
- // Compute B.
- compute_B<ext_text_offset_type>(text_length, B, irreducible_bits_filename,
- C_filename, phi_undefined_position, io_volume);
- } else {
- // Compute B.
- B = compute_B<text_offset_type>(text_length, text_filename, sa_filename,
- n_irreducible_lcps, sum_irreducible_lcps, io_volume);
- }
-
- // Update I/O volume.
- total_io_volume += io_volume;
-
- // Print summary.
- long double compute_plcp_bitvector_time = utils::wclock() - compute_plcp_bitvector_start;
- fprintf(stderr, "Summary: time = %.2Lfs, total I/O vol = %.2Lfn\n\n",
- compute_plcp_bitvector_time, (1.L * io_volume) / text_length);
-
- // Return pointer to B.
- return B;
-}
-
-template<typename text_offset_type, typename ext_text_offset_type>
-void compute_plcp_bitvector(std::uint64_t text_length, std::uint64_t ram_use,
- std::string text_filename, std::string sa_filename, std::string bwt_filename,
- std::string output_filename, std::uint64_t &n_irreducible_lcps,
- std::uint64_t &sum_irreducible_lcps, std::uint64_t &total_io_volume) {
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use;
- if (text_to_ram_ratio > 4.0L) {
- // Not enough RAM to hold B in RAM.
- compute_plcp_bitvector_small_ram<text_offset_type, ext_text_offset_type>(text_length, ram_use, text_filename,
- sa_filename, bwt_filename, output_filename, n_irreducible_lcps, sum_irreducible_lcps, total_io_volume);
- } else {
- // Enough RAM to hold B in RAM.
- std::uint64_t *B = compute_plcp_bitvector_large_ram<text_offset_type, ext_text_offset_type>(text_length,
- ram_use, text_filename, sa_filename, bwt_filename, output_filename, n_irreducible_lcps,
- sum_irreducible_lcps, total_io_volume);
-
- // Write B to disk.
- {
- // Start the timer.
- fprintf(stderr, "Write PLCP bitvector to disk: ");
- long double write_plcp_start = utils::wclock();
- std::uint64_t io_volume = 0;
-
- // Write the data.
- std::uint64_t length_of_B_in_words = (2UL * text_length + 63) / 64;
- utils::write_to_file(B, length_of_B_in_words, output_filename);
-
- // Update I/O volume.
- io_volume += length_of_B_in_words * sizeof(std::uint64_t);
- total_io_volume += io_volume;
-
- // Print summary.
- long double write_plcp_time = utils::wclock() - write_plcp_start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, I/O vol = %.2Lfn\n\n", write_plcp_time,
- ((1.L * io_volume) / (1L << 20)) / write_plcp_time, (1.L * io_volume) / text_length);
- }
- delete[] B;
- }
-}
-
-template<typename text_offset_type, typename ext_text_offset_type>
-void compute_plcp_bitvector(std::string text_filename, std::string sa_filename,
- std::string bwt_filename, std::string output_filename, std::uint64_t ram_use) {
- utils::drop_disk_pages(text_filename);
- utils::drop_disk_pages(sa_filename);
- utils::drop_disk_pages(bwt_filename);
- srand(time(0) + getpid());
- long double global_start = utils::wclock();
- std::uint64_t total_io_volume = 0;
-
- // Compute basic parameters.
- std::uint64_t text_length = utils::file_size(text_filename);
- std::uint64_t n_irreducible_lcps = 0;
- std::uint64_t sum_irreducible_lcps = 0;
- long double text_to_ram_ratio = (long double)text_length / (long double)ram_use;
-
- if (text_length == 0) {
- fprintf(stderr, "Error: the input file is empty!\n");
- std::exit(EXIT_FAILURE);
- }
-
- // Turn paths absolute.
- text_filename = utils::absolute_path(text_filename);
- sa_filename = utils::absolute_path(sa_filename);
- bwt_filename = utils::absolute_path(bwt_filename);
- output_filename = utils::absolute_path(output_filename);
-
- // Print summary of basic parameters.
- fprintf(stderr, "Text filename = %s\n", text_filename.c_str());
- fprintf(stderr, "SA filename = %s\n", sa_filename.c_str());
- fprintf(stderr, "BWT filename = %s\n", bwt_filename.c_str());
- fprintf(stderr, "Output filename = %s\n", output_filename.c_str());
- fprintf(stderr, "Text length = %lu (%.2LfMiB)\n", text_length, 1.L * text_length / (1 << 20));
- fprintf(stderr, "Text size / ram_use = %.2Lf\n", text_to_ram_ratio);
- fprintf(stderr, "RAM use = %lu (%.2LfMiB)\n", ram_use, ram_use / (1024.L * 1024));
- fprintf(stderr, "sizeof(text_offset_type) = %lu\n", sizeof(text_offset_type));
- fprintf(stderr, "sizeof(ext_text_offset_type) = %lu\n", sizeof(ext_text_offset_type));
-#ifdef _OPENMP
- fprintf(stderr, "Max number of threads = %d\n", omp_get_max_threads());
-#endif
- fprintf(stderr, "\n");
-
- compute_plcp_bitvector<text_offset_type, ext_text_offset_type>(text_length, ram_use, text_filename,
- sa_filename, bwt_filename, output_filename, n_irreducible_lcps,
- sum_irreducible_lcps, total_io_volume);
-
- // Print summary.
- long double total_time = utils::wclock() - global_start;
- fprintf(stderr, "\n\nComputation finished. Summary:\n");
- fprintf(stderr, " elapsed time = %.2Lfs (%.3Lfs/MiB of text)\n", total_time, total_time / (1.L * text_length / (1L << 20)));
- fprintf(stderr, " speed = %.2LfMiB of text/s\n", (1.L * text_length / (1L << 20)) / total_time);
- fprintf(stderr, " I/O volume = %lu (%.2Lfbytes/input symbol)\n", total_io_volume, (1.L * total_io_volume) / text_length);
- fprintf(stderr, " number of irreducible LCPs = %lu\n", n_irreducible_lcps);
- fprintf(stderr, " sum of irreducible LCPs = %lu\n", sum_irreducible_lcps);
-}
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_COMPUTE_PLCP_BITVECTOR_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/distribute_pairs_and_compute_C.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/distribute_pairs_and_compute_C.hpp
deleted file mode 100644
index f872cb05..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/distribute_pairs_and_compute_C.hpp
+++ /dev/null
@@ -1,523 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/distribute_pairs_and_compute_C.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_DISTRIBUTE_PAIRS_AND_COMPUTE_C_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_DISTRIBUTE_PAIRS_AND_COMPUTE_C_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <string>
-#include <algorithm>
-#include <omp.h>
-
-#include "io/async_stream_reader.hpp"
-#include "io/async_multi_stream_writer.hpp"
-#include "set_bits.hpp"
-#include "utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-template<typename text_offset_type>
-std::uint64_t distribute_pairs(std::uint64_t text_length, std::uint64_t max_halfsegment_size,
- std::uint64_t ram_use, std::string sa_filename, std::string bwt_filename, std::string **pairs_filenames,
- std::uint64_t &n_irreducible_lcps, std::uint64_t &total_io_volume) {
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size;
- std::uint64_t n_irreducible = 0;
- std::uint64_t phi_undefined_position = 0;
-
- fprintf(stderr, " Distribute irreducible (i, Phi[i]) pairs: ");
- long double start = utils::wclock();
-
- // Create a map from used halfsegment pairs to a contiguous
- // range of integers. This is needed to use multifile writer.
- std::uint64_t **halfseg_ids_to_file_id = new std::uint64_t*[n_halfsegments];
- {
- for (std::uint64_t i = 0; i < n_halfsegments; ++i)
- halfseg_ids_to_file_id[i] = new std::uint64_t[n_halfsegments];
-
- std::uint64_t file_counter = 0;
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) {
- for (std::uint64_t j = i; j < n_halfsegments; ++j) {
- halfseg_ids_to_file_id[i][j] = file_counter;
- halfseg_ids_to_file_id[j][i] = file_counter;
- ++file_counter;
- }
- }
- }
-
- // Initialize multifile writer of (i, Phi[i]) pairs.
- static const std::uint64_t n_free_buffers = 4;
- std::uint64_t halfseg_buffers_ram = ram_use;
- std::uint64_t n_different_halfseg_pairs = (n_halfsegments * (n_halfsegments + 1)) / 2;
- std::uint64_t buffer_size = std::max(1UL, halfseg_buffers_ram / (n_different_halfseg_pairs + n_free_buffers));
- typedef async_multi_stream_writer<text_offset_type> pair_multiwriter_type;
- pair_multiwriter_type *pair_multiwriter = new pair_multiwriter_type(buffer_size, n_free_buffers);
- for (std::uint64_t i = 0; i < n_halfsegments; ++i)
- for (std::uint64_t j = i; j < n_halfsegments; ++j)
- pair_multiwriter->add_file(pairs_filenames[i][j]);
-
- // Initialize suffix array reader.
- typedef async_stream_reader<text_offset_type> sa_reader_type;
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename);
-
- // Initialize BWT reader.
- typedef async_stream_reader<std::uint8_t> bwt_reader_type;
- bwt_reader_type *bwt_reader = new bwt_reader_type(bwt_filename);
-
- // Distribution follows.
- std::uint8_t prev_bwt = 0;
- std::uint64_t prev_sa = 0;
- std::uint64_t prev_halfseg_id = 0;
- for (std::uint64_t i = 0; i < text_length; ++i) {
- std::uint64_t cur_sa = sa_reader->read();
- std::uint64_t cur_halfseg_id = cur_sa / max_halfsegment_size;
- std::uint8_t cur_bwt = bwt_reader->read();
-
- if (i == 0 || cur_sa == 0 || prev_sa == 0 || cur_bwt != prev_bwt) {
- // PLCP[cur_sa] is irreducible. Write (i, Phi[i]) to appropriate file.
- ++n_irreducible;
- if (i > 0) {
- std::uint64_t file_id = halfseg_ids_to_file_id[cur_halfseg_id][prev_halfseg_id];
- pair_multiwriter->write_to_ith_file(file_id, (text_offset_type)cur_sa);
- pair_multiwriter->write_to_ith_file(file_id, (text_offset_type)prev_sa);
- } else phi_undefined_position = cur_sa;
- }
-
- prev_halfseg_id = cur_halfseg_id;
- prev_sa = cur_sa;
- prev_bwt = cur_bwt;
- }
-
- // Print summary.
- long double elapsed = utils::wclock() - start;
- std::uint64_t io_volume = sa_reader->bytes_read() + bwt_reader->bytes_read() + pair_multiwriter->bytes_written();
- total_io_volume += io_volume;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n",
- elapsed, ((1.L * io_volume) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length);
-
- // Clean up.
- delete bwt_reader;
- delete sa_reader;
- delete pair_multiwriter;
- for (std::uint64_t i = 0; i < n_halfsegments; ++i)
- delete[] halfseg_ids_to_file_id[i];
- delete[] halfseg_ids_to_file_id;
-
- // Return undefined Phi position.
- n_irreducible_lcps = n_irreducible;
- return phi_undefined_position;
-}
-
-template<typename text_offset_type>
-void compute_C(std::uint64_t text_length, std::uint64_t max_halfsegment_size, std::uint64_t ram_use,
- std::uint64_t phi_undefined_position, std::string **pairs_filenames, std::string sa_filename,
- std::string bwt_filename, std::string C_filename, std::uint64_t &total_io_volume) {
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size;
- std::uint64_t max_block_size = 8UL * ram_use;
- while (max_block_size & 63UL)
- ++max_block_size;
-
- std::uint64_t n_blocks = (text_length + max_block_size - 1) / max_block_size;
- std::uint64_t io_vol_scan_sa = (1 + sizeof(text_offset_type)) * text_length * n_blocks;
-
- std::uint64_t io_vol_scan_pairs = 0;
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) {
- std::uint64_t block_beg = block_id * max_block_size;
- std::uint64_t block_end = std::min(block_beg + max_block_size, text_length);
- for (std::uint64_t left_halfseg_id = 0; left_halfseg_id < n_halfsegments; ++left_halfseg_id) {
- std::uint64_t left_halfseg_beg = left_halfseg_id * max_halfsegment_size;
- std::uint64_t left_halfseg_end = std::min(left_halfseg_beg + max_halfsegment_size, text_length);
- for (std::uint64_t right_halfseg_id = left_halfseg_id; right_halfseg_id < n_halfsegments; ++right_halfseg_id) {
- std::uint64_t right_halfseg_beg = right_halfseg_id * max_halfsegment_size;
- std::uint64_t right_halfseg_end = std::min(right_halfseg_beg + max_halfsegment_size, text_length);
- if ((left_halfseg_end > block_beg && block_end > left_halfseg_beg) ||
- (right_halfseg_end > block_beg && block_end > right_halfseg_beg))
- io_vol_scan_pairs += utils::file_size(pairs_filenames[left_halfseg_id][right_halfseg_id]);
- }
- }
- }
-
- if (io_vol_scan_sa <= io_vol_scan_pairs) {
- fprintf(stderr, " Compute bitvector C (method I): ");
- long double start = utils::wclock();
- std::uint64_t io_vol = 0;
-
- // Allocate the array holding the block of C.
- std::uint64_t max_block_size_in_words = max_block_size / 64;
- std::uint64_t *C = new std::uint64_t[max_block_size_in_words];
- std::FILE *f = utils::file_open(C_filename, "w");
-
- // Initialize the buffer.
- static const std::uint64_t buffer_size = (1UL << 20);
- std::uint64_t *buf = new std::uint64_t[buffer_size];
-#ifdef _OPENMP
- std::uint64_t *tempbuf = new std::uint64_t[buffer_size];
-#endif
-
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) {
- std::uint64_t block_beg = block_id * max_block_size;
- std::uint64_t block_end = std::min(block_beg + max_block_size, text_length);
- std::uint64_t block_size = block_end - block_beg;
- std::uint64_t block_size_in_words = (block_size + 63) / 64;
-
- // Zero-initialize the block of C.
- std::fill(C, C + block_size_in_words, 0UL);
-
- // Initialize suffix array reader.
- typedef async_stream_reader<text_offset_type> sa_reader_type;
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename);
-
- // Initialize BWT reader.
- typedef async_stream_reader<std::uint8_t> bwt_reader_type;
- bwt_reader_type *bwt_reader = new bwt_reader_type(bwt_filename);
-
- // Scan SA and BWT left to right.
- std::uint64_t filled = 0;
- std::uint8_t prev_bwt = 0;
- std::uint64_t prev_sa = 0;
- for (std::uint64_t i = 0; i < text_length; ++i) {
- std::uint64_t cur_sa = sa_reader->read();
- std::uint8_t cur_bwt = bwt_reader->read();
-
- if (block_beg <= cur_sa && cur_sa < block_end &&
- (i == 0 || cur_sa == 0 || prev_sa == 0 || cur_bwt != prev_bwt)) {
- // PLCP[cur_sa] is irreducible.
- std::uint64_t offset = cur_sa - block_beg;
- buf[filled++] = offset;
- if (filled == buffer_size) {
-#ifdef _OPENMP
- set_bits(C, block_size, buf, filled, tempbuf);
-#else
- set_bits(C, buf, filled);
-#endif
- filled = 0;
- }
- }
-
- prev_sa = cur_sa;
- prev_bwt = cur_bwt;
- }
-
- // Flush the remaining items in the buffer.
- if (filled > 0) {
-#ifdef _OPENMP
- set_bits(C, block_size, buf, filled, tempbuf);
-#else
- set_bits(C, buf, filled);
-#endif
- filled = 0;
- }
-
- // Write current block of C to file.
- utils::write_to_file(C, block_size_in_words, f);
-
- // Update I/O volume.
- io_vol += sa_reader->bytes_read() + bwt_reader->bytes_read() + block_size_in_words * sizeof(std::uint64_t);
-
- // Clean up.
- delete sa_reader;
- delete bwt_reader;
- }
-
- // Clean up.
-#ifdef _OPENMP
- delete[] tempbuf;
-#endif
- delete[] buf;
- delete[] C;
- std::fclose(f);
-
- // Update I/O volume.
- total_io_volume += io_vol;
-
- // Print summary.
- long double elapsed = utils::wclock() - start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", elapsed,
- ((1.L * io_vol) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length);
- } else {
- fprintf(stderr, " Compute bitvector C (method II): ");
- long double start = utils::wclock();
- std::uint64_t io_vol = 0;
-
- // Allocate the array holding the block of C.
- std::uint64_t max_block_size_in_words = max_block_size / 64;
- std::uint64_t *C = new std::uint64_t[max_block_size_in_words];
- std::FILE *f = utils::file_open(C_filename, "w");
-
- // Initialize the buffer.
- static const std::uint64_t buffer_size = (1UL << 20);
- std::uint64_t *buf = new std::uint64_t[buffer_size];
-#ifdef _OPENMP
- std::uint64_t *tempbuf = new std::uint64_t[buffer_size];
-#endif
-
- // Process blocks of C left to right.
- for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) {
- std::uint64_t block_beg = block_id * max_block_size;
- std::uint64_t block_end = std::min(block_beg + max_block_size, text_length);
- std::uint64_t block_size = block_end - block_beg;
- std::uint64_t block_size_in_words = (block_size + 63) / 64;
-
- // Zero-initialize the block of C.
- std::fill(C, C + block_size_in_words, 0UL);
-
- // Iterate through all pairs of halfsegments.
- std::uint64_t filled = 0;
- for (std::uint64_t left_halfseg_id = 0; left_halfseg_id < n_halfsegments; ++left_halfseg_id) {
- std::uint64_t left_halfseg_beg = left_halfseg_id * max_halfsegment_size;
- std::uint64_t left_halfseg_end = std::min(left_halfseg_beg + max_halfsegment_size, text_length);
-
- for (std::uint64_t right_halfseg_id = left_halfseg_id; right_halfseg_id < n_halfsegments; ++right_halfseg_id) {
- std::uint64_t right_halfseg_beg = right_halfseg_id * max_halfsegment_size;
- std::uint64_t right_halfseg_end = std::min(right_halfseg_beg + max_halfsegment_size, text_length);
-
- if ((left_halfseg_end > block_beg && block_end > left_halfseg_beg) ||
- (right_halfseg_end > block_beg && block_end > right_halfseg_beg)) {
- // Initialize reading of pairs.
- typedef async_stream_reader<text_offset_type> pair_reader_type;
- pair_reader_type *pair_reader = new pair_reader_type(pairs_filenames[left_halfseg_id][right_halfseg_id]);
-
- while (pair_reader->empty() == false) {
- std::uint64_t i = pair_reader->read();
- pair_reader->read(); // Skip Phi[i].
-
- if (block_beg <= i && i < block_end) {
- std::uint64_t offset = i - block_beg;
- buf[filled++] = offset;
- if (filled == buffer_size) {
-#ifdef _OPENMP
- set_bits(C, block_size, buf, filled, tempbuf);
-#else
- set_bits(C, buf, filled);
-#endif
- filled = 0;
- }
- }
- }
-
- // Update I/O volume.
- io_vol += pair_reader->bytes_read();
-
- // Clean up.
- delete pair_reader;
- }
- }
- }
-
- // Flush the remaining items in the buffer.
- if (filled > 0) {
-#ifdef _OPENMP
- set_bits(C, block_size, buf, filled, tempbuf);
-#else
- set_bits(C, buf, filled);
-#endif
- filled = 0;
- }
-
- // Special case.
- if (block_beg <= phi_undefined_position && phi_undefined_position < block_end) {
- std::uint64_t offset = phi_undefined_position - block_beg;
- C[offset >> 6] |= (1UL << (offset & 63));
- }
-
- // Write current block of C to file.
- utils::write_to_file(C, block_size_in_words, f);
-
- // Update I/O volume.
- io_vol += block_size_in_words * sizeof(std::uint64_t);
- }
-
- // Clean up.
-#ifdef _OPENMP
- delete[] tempbuf;
-#endif
- delete[] buf;
- delete[] C;
- std::fclose(f);
-
-
- // Update I/O volume.
- total_io_volume += io_vol;
-
- // Print summary.
- long double elapsed = utils::wclock() - start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n", elapsed,
- ((1.L * io_vol) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length);
- }
-}
-
-template<typename text_offset_type>
-std::uint64_t distribute_pairs_and_compute_C(std::uint64_t text_length,
- std::uint64_t max_halfsegment_size, std::uint64_t ram_use, std::string sa_filename,
- std::string bwt_filename, std::string C_filename, std::string **pairs_filenames,
- std::uint64_t &n_irreducible_lcps, std::uint64_t &total_io_volume) {
- fprintf(stderr, " Distribute irreducible (i, Phi[i]) pairs and compute bitvector C: ");
- long double start = utils::wclock();
-
- // Initialize basic parameters.
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size;
- std::uint64_t n_different_halfseg_pairs = (n_halfsegments * (n_halfsegments + 1)) / 2;
- std::uint64_t io_volume = 0;
- std::uint64_t n_irreducible = 0;
- std::uint64_t phi_undefined_position = 0;
-
- // Allocate bitvector C.
- std::uint64_t C_size_in_words = (text_length + 63) / 64;
- std::uint64_t C_size_in_bytes = (text_length + 7) / 8;
- std::uint64_t *C = new std::uint64_t[C_size_in_words];
- std::fill(C, C + C_size_in_words, 0UL);
-
- // Create a map from used halfsegment pairs to a contiguous
- // range of integers. This is needed to use multifile writer.
- std::uint64_t **halfseg_ids_to_file_id = new std::uint64_t*[n_halfsegments];
- {
- for (std::uint64_t i = 0; i < n_halfsegments; ++i)
- halfseg_ids_to_file_id[i] = new std::uint64_t[n_halfsegments];
-
- std::uint64_t file_counter = 0;
- for (std::uint64_t i = 0; i < n_halfsegments; ++i) {
- for (std::uint64_t j = i; j < n_halfsegments; ++j) {
- halfseg_ids_to_file_id[i][j] = file_counter;
- halfseg_ids_to_file_id[j][i] = file_counter;
- ++file_counter;
- }
- }
- }
-
- // Initialize multifile writer of (i, Phi[i]) pairs.
- static const std::uint64_t n_free_buffers = 4;
- std::uint64_t halfseg_buffers_ram = ram_use - C_size_in_bytes;
- std::uint64_t buffer_size = std::max((1UL << 20), halfseg_buffers_ram / (n_different_halfseg_pairs + n_free_buffers));
- typedef async_multi_stream_writer<text_offset_type> pair_multiwriter_type;
- pair_multiwriter_type *pair_multiwriter = new pair_multiwriter_type(buffer_size, n_free_buffers);
- for (std::uint64_t i = 0; i < n_halfsegments; ++i)
- for (std::uint64_t j = i; j < n_halfsegments; ++j)
- pair_multiwriter->add_file(pairs_filenames[i][j]);
-
- // Initialize suffix array reader.
- typedef async_stream_reader<text_offset_type> sa_reader_type;
- sa_reader_type *sa_reader = new sa_reader_type(sa_filename);
-
- // Initialize BWT reader.
- typedef async_stream_reader<std::uint8_t> bwt_reader_type;
- bwt_reader_type *bwt_reader = new bwt_reader_type(bwt_filename);
-
- // Initialize the buffer.
- static const std::uint64_t local_buffer_size = (1UL << 20);
- std::uint64_t *buf = new std::uint64_t[local_buffer_size];
-#ifdef _OPENMP
- std::uint64_t *tempbuf = new std::uint64_t[local_buffer_size];
-#endif
-
- // Distribution follows.
- std::uint64_t filled = 0;
- std::uint8_t prev_bwt = 0;
- std::uint64_t prev_sa = 0;
- std::uint64_t prev_halfseg_id = 0;
- for (std::uint64_t i = 0; i < text_length; ++i) {
- std::uint64_t cur_sa = sa_reader->read();
- std::uint64_t cur_halfseg_id = cur_sa / max_halfsegment_size;
- std::uint8_t cur_bwt = bwt_reader->read();
-
- if (i == 0 || cur_sa == 0 || prev_sa == 0 || cur_bwt != prev_bwt) {
- // PLCP[cur_sa] is irreducible. Write (i, Phi[i]) to appropriate file.
- ++n_irreducible;
- buf[filled++] = cur_sa;
- if (filled == local_buffer_size) {
-#ifdef _OPENMP
- set_bits(C, text_length, buf, filled, tempbuf);
-#else
- set_bits(C, buf, filled);
-#endif
- filled = 0;
- }
-
- if (i > 0) {
- std::uint64_t file_id = halfseg_ids_to_file_id[cur_halfseg_id][prev_halfseg_id];
- pair_multiwriter->write_to_ith_file(file_id, (text_offset_type)cur_sa);
- pair_multiwriter->write_to_ith_file(file_id, (text_offset_type)prev_sa);
- } else phi_undefined_position = cur_sa;
- }
-
- prev_halfseg_id = cur_halfseg_id;
- prev_sa = cur_sa;
- prev_bwt = cur_bwt;
- }
-
- // Flush the remaining items in the buffer.
- if (filled > 0) {
-#ifdef _OPENMP
- set_bits(C, text_length, buf, filled, tempbuf);
-#else
- set_bits(C, buf, filled);
-#endif
- filled = 0;
- }
-
- // Write C to disk.
- utils::write_to_file(C, C_size_in_words, C_filename);
-
- // Update I/O volume.
- io_volume += sa_reader->bytes_read() + bwt_reader->bytes_read() +
- pair_multiwriter->bytes_written() + C_size_in_words * sizeof(std::uint64_t);
- total_io_volume += io_volume;
-
- // Clean up.
-#ifdef _OPENMP
- delete[] tempbuf;
-#endif
- delete[] buf;
- delete[] C;
- delete bwt_reader;
- delete sa_reader;
- delete pair_multiwriter;
- for (std::uint64_t i = 0; i < n_halfsegments; ++i)
- delete[] halfseg_ids_to_file_id[i];
- delete[] halfseg_ids_to_file_id;
-
- // Print summary.
- long double elapsed = utils::wclock() - start;
- fprintf(stderr, "time = %.2Lfs, I/O = %.2LfMiB/s, total I/O vol = %.2Lfn\n",
- elapsed, ((1.L * io_volume) / (1L << 20)) / elapsed, (1.L * total_io_volume) / text_length);
-
- // Update reference variables.
- n_irreducible_lcps = n_irreducible;
-
- // Return undefined Phi position.
- return phi_undefined_position;
-}
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_DISTRIBUTE_PAIRS_AND_COMPUTE_C_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multi_stream_writer.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multi_stream_writer.hpp
deleted file mode 100644
index 91981e83..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multi_stream_writer.hpp
+++ /dev/null
@@ -1,291 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/io/async_multi_stream_writer.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTI_STREAM_WRITER_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTI_STREAM_WRITER_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <vector>
-#include <queue>
-#include <string>
-#include <algorithm>
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-
-#include "../utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-template<typename value_type>
-class async_multi_stream_writer {
- private:
- template<typename T>
- struct buffer {
- buffer(std::uint64_t size) {
- m_size = size;
- m_content = (T *)malloc(m_size * sizeof(T));
- m_filled = 0;
- }
-
- void write_to_file(std::FILE *f) {
- utils::write_to_file(m_content, m_filled, f);
- m_filled = 0;
- }
-
- ~buffer() {
- free(m_content);
- }
-
- inline bool empty() const { return m_filled == 0; }
- inline bool full() const { return m_filled == m_size; }
- inline std::uint64_t size_in_bytes() const { return sizeof(T) * m_filled; }
-
- T *m_content;
- std::uint64_t m_size;
- std::uint64_t m_filled;
- };
-
- template<typename buffer_type>
- struct request {
- request(buffer_type *buffer, std::uint64_t file_id) {
- m_buffer = buffer;
- m_file_id = file_id;
- }
-
- buffer_type *m_buffer;
- std::uint64_t m_file_id;
- };
-
- template<typename request_type>
- struct request_queue {
- request_queue()
- : m_no_more_requests(false) {}
-
- request_type get() {
- request_type ret = m_requests.front();
- m_requests.pop();
- return ret;
- }
-
- inline void add(request_type request) {
- std::lock_guard<std::mutex> lk(m_mutex);
- m_requests.push(request);
- }
-
- inline bool empty() const { return m_requests.empty(); }
-
- std::queue<request_type> m_requests; // Must have FIFO property
- std::condition_variable m_cv;
- std::mutex m_mutex;
- bool m_no_more_requests;
- };
-
- template<typename buffer_type>
- struct buffer_collection {
- // Separate method to allow locking.
- inline void add(buffer_type *buffer) {
- std::lock_guard<std::mutex> lk(m_mutex);
- m_buffers.push_back(buffer);
- }
-
- buffer_type* get() {
- buffer_type *ret = m_buffers.back();
- m_buffers.pop_back();
- return ret;
- }
-
- inline bool empty() const { return m_buffers.empty(); }
-
- std::vector<buffer_type*> m_buffers;
- std::condition_variable m_cv;
- std::mutex m_mutex;
- };
-
- private:
- template<typename T>
- static void async_io_thread_code(async_multi_stream_writer<T> *caller) {
- typedef buffer<T> buffer_type;
- typedef request<buffer_type> request_type;
- while (true) {
- // Wait for request or until 'no more requests' flag is set.
- std::unique_lock<std::mutex> lk(caller->m_write_requests.m_mutex);
- while (caller->m_write_requests.empty() &&
- !(caller->m_write_requests.m_no_more_requests))
- caller->m_write_requests.m_cv.wait(lk);
-
- if (caller->m_write_requests.empty() &&
- caller->m_write_requests.m_no_more_requests) {
- // No more requests -- exit.
- lk.unlock();
- break;
- }
-
- // Extract the buffer from the collection.
- request_type request = caller->m_write_requests.get();
- lk.unlock();
-
- // Write the data to disk.
- request.m_buffer->write_to_file(caller->m_files[request.m_file_id]);
-
- // Add the (now empty) buffer to the collection
- // of empty buffers and notify the waiting thread.
- caller->m_free_buffers.add(request.m_buffer);
- caller->m_free_buffers.m_cv.notify_one();
- }
- }
-
- private:
- typedef buffer<value_type> buffer_type;
- typedef request<buffer_type> request_type;
-
- std::uint64_t m_bytes_written;
- std::uint64_t m_items_per_buf;
-
- std::vector<std::FILE*> m_files;
- std::vector<buffer_type*> m_buffers;
- buffer_collection<buffer_type> m_free_buffers;
- request_queue<request_type> m_write_requests;
- std::thread *m_io_thread;
-
- // Issue a request to write to buffer.
- void issue_write_request(std::uint64_t file_id) {
- request_type req(m_buffers[file_id], file_id);
- m_buffers[file_id] = NULL;
- m_write_requests.add(req);
- m_write_requests.m_cv.notify_one();
- }
-
- // Get a free buffer from the collection of free buffers.
- buffer_type* get_free_buffer() {
- std::unique_lock<std::mutex> lk(m_free_buffers.m_mutex);
- while (m_free_buffers.empty())
- m_free_buffers.m_cv.wait(lk);
- buffer_type *ret = m_free_buffers.get();
- lk.unlock();
- return ret;
- }
-
- public:
- async_multi_stream_writer(std::uint64_t bufsize_per_file_in_bytes = (1UL << 20),
- std::uint64_t n_free_buffers = 4UL) {
- // Initialize basic parameters.
- // Works even with n_free_buffers == 0.
- m_bytes_written = 0;
- m_items_per_buf = std::max(1UL, bufsize_per_file_in_bytes / sizeof(value_type));
-
- // Initialize empty buffers.
- for (std::uint64_t j = 0; j < n_free_buffers; ++j)
- m_free_buffers.add(new buffer_type(m_items_per_buf));
-
- // Start the I/O thread.
- m_io_thread = new std::thread(async_io_thread_code<value_type>, this);
- }
-
- // The added file gets the next available ID (starting from 0).
- void add_file(std::string filename, std::string write_mode =
- std::string("w")) {
- m_buffers.push_back(new buffer_type(m_items_per_buf));
- m_files.push_back(utils::file_open_nobuf(filename, write_mode));
- }
-
- // Write value to i-th file.
- void write_to_ith_file(std::uint64_t i, value_type value) {
- m_bytes_written += sizeof(value_type);
- m_buffers[i]->m_content[m_buffers[i]->m_filled++] = value;
- if (m_buffers[i]->full()) {
- issue_write_request(i);
- m_buffers[i] = get_free_buffer();
- }
- }
-
- // Write values[0..length) to i-th file.
- void write_to_ith_file(std::uint64_t i, const value_type *values, std::uint64_t length) {
- m_bytes_written += length * sizeof(value_type);
- while (length > 0) {
- std::uint64_t towrite = std::min(length, m_items_per_buf - m_buffers[i]->m_filled);
- std::copy(values, values + towrite, m_buffers[i]->m_content + m_buffers[i]->m_filled);
- m_buffers[i]->m_filled += towrite;
- length -= towrite;
- values += towrite;
- if (m_buffers[i]->full()) {
- issue_write_request(i);
- m_buffers[i] = get_free_buffer();
- }
- }
- }
-
- // Return performed I/O in bytes.
- inline std::uint64_t bytes_written() const {
- return m_bytes_written;
- }
-
- // Destructor.
- ~async_multi_stream_writer() {
- // Flush all buffers.
- std::uint64_t n_buffers = m_buffers.size();
- for (std::uint64_t file_id = 0; file_id < n_buffers; ++file_id) {
- if (!(m_buffers[file_id]->empty()))
- issue_write_request(file_id);
- }
-
- // Let the I/O thread know that there
- // won't be any more requests.
- std::unique_lock<std::mutex> lk(m_write_requests.m_mutex);
- m_write_requests.m_no_more_requests = true;
- lk.unlock();
- m_write_requests.m_cv.notify_one();
-
- // Wait for the I/O thread to finish.
- m_io_thread->join();
- delete m_io_thread;
-
- // Delete buffers and close files.
- for (std::uint64_t file_id = 0; file_id < n_buffers; ++file_id) {
- delete m_buffers[file_id]; // Can be NULL
- std::fclose(m_files[file_id]);
- }
-
- // Delete free buffers.
- while (!(m_free_buffers.empty())) {
- buffer_type *buf = m_free_buffers.get();
- delete buf;
- }
- }
-};
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTI_STREAM_WRITER_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_file_writer.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_file_writer.hpp
deleted file mode 100644
index 726e6193..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_file_writer.hpp
+++ /dev/null
@@ -1,281 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/io/async_multipart_file_writer.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_FILE_WRITER_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_FILE_WRITER_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <queue>
-#include <string>
-#include <algorithm>
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-
-#include "../utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-template<typename value_type>
-class async_multipart_file_writer {
- private:
- template<typename T>
- struct buffer {
- buffer(std::uint64_t size) {
- m_size = size;
- m_content = (T *)malloc(m_size * sizeof(T));
- m_filled = 0;
- }
-
- ~buffer() {
- free(m_content);
- }
-
- inline std::uint64_t size_in_bytes() const { return sizeof(T) * m_filled; }
- inline std::uint64_t free_space() const { return m_size - m_filled; }
-
- inline bool empty() const { return m_filled == 0; }
- inline bool full() const { return m_filled == m_size; }
-
- T *m_content;
- std::uint64_t m_size;
- std::uint64_t m_filled;
- };
-
- template<typename buffer_type>
- struct buffer_queue {
- buffer_queue(std::uint64_t n_buffers = 0, std::uint64_t items_per_buf = 0) {
- m_signal_stop = false;
- for (std::uint64_t i = 0; i < n_buffers; ++i)
- m_queue.push(new buffer_type(items_per_buf));
- }
-
- ~buffer_queue() {
- while (!m_queue.empty()) {
- buffer_type *buf = m_queue.front();
- m_queue.pop();
- delete buf;
- }
- }
-
- buffer_type *pop() {
- buffer_type *ret = m_queue.front();
- m_queue.pop();
- return ret;
- }
-
- void push(buffer_type *buf) {
- std::lock_guard<std::mutex> lk(m_mutex);
- m_queue.push(buf);
- }
-
- void send_stop_signal() {
- std::lock_guard<std::mutex> lk(m_mutex);
- m_signal_stop = true;
- }
-
- inline bool empty() const { return m_queue.empty(); }
-
- std::queue<buffer_type*> m_queue; // Must have FIFO property
- std::condition_variable m_cv;
- std::mutex m_mutex;
- bool m_signal_stop;
- };
-
- private:
- typedef buffer<value_type> buffer_type;
- typedef buffer_queue<buffer_type> buffer_queue_type;
-
- buffer_queue_type *m_empty_buffers;
- buffer_queue_type *m_full_buffers;
-
- private:
- template<typename T>
- static void io_thread_code(async_multipart_file_writer<T> *caller) {
- typedef buffer<T> buffer_type;
- while (true) {
- // Wait for the full buffer (or a stop signal).
- std::unique_lock<std::mutex> lk(caller->m_full_buffers->m_mutex);
- while (caller->m_full_buffers->empty() &&
- !(caller->m_full_buffers->m_signal_stop))
- caller->m_full_buffers->m_cv.wait(lk);
-
- if (caller->m_full_buffers->empty()) {
- // We received the stop signal -- exit.
- lk.unlock();
- break;
- }
-
- // Extract the buffer from the collection.
- buffer_type *buffer = caller->m_full_buffers->pop();
- lk.unlock();
-
- // Safely write the data to disk.
- const T *ptr = buffer->m_content;
- while (buffer->m_filled > 0) {
- if (caller->m_file == NULL || caller->m_cur_part_items_written == caller->m_single_part_max_items) {
- if (caller->m_file != NULL) {
- std::fclose(caller->m_file);
- ++caller->m_cur_part;
- } else caller->m_cur_part = 0;
- std::string cur_part_filename = caller->m_filename +
- ".multipart_file.part" + utils::intToStr(caller->m_cur_part);
- caller->m_file = utils::file_open(cur_part_filename, "w");
- caller->m_cur_part_items_written = 0;
- }
-
- std::uint64_t cur_part_items_left = caller->m_single_part_max_items - caller->m_cur_part_items_written;
- std::uint64_t towrite = std::min(cur_part_items_left, buffer->m_filled);
- utils::write_to_file(ptr, towrite, caller->m_file);
- caller->m_cur_part_items_written += towrite;
- buffer->m_filled -= towrite;
- ptr += towrite;
- }
-
- // Add the (now empty) buffer to the collection
- // of empty buffers and notify the waiting thread.
- caller->m_empty_buffers->push(buffer);
- caller->m_empty_buffers->m_cv.notify_one();
- }
- }
-
- // Get a free buffer from the collection of free buffers.
- buffer_type* get_empty_buffer() {
- std::unique_lock<std::mutex> lk(m_empty_buffers->m_mutex);
- while (m_empty_buffers->empty())
- m_empty_buffers->m_cv.wait(lk);
- buffer_type *ret = m_empty_buffers->pop();
- lk.unlock();
- return ret;
- }
-
- private:
- std::FILE *m_file;
- std::string m_filename;
-
- std::uint64_t m_cur_part;
- std::uint64_t m_single_part_max_items;
- std::uint64_t m_cur_part_items_written;
- std::uint64_t m_bytes_written;
- std::uint64_t m_items_per_buf;
-
- buffer_type *m_cur_buffer;
- std::thread *m_io_thread;
-
- public:
- async_multipart_file_writer(std::string filename,
- std::uint64_t single_part_max_bytes,
- std::uint64_t total_buf_size_bytes = (8UL << 20),
- std::uint64_t n_buffers = 4UL) {
- m_filename = filename;
-
- // Initialize basic parameters. Note: if no items are
- // written, this class does not create any files.
- m_single_part_max_items = std::max(1UL, single_part_max_bytes / sizeof(value_type));
- m_file = NULL;
-
- // Allocate buffers.
- std::uint64_t total_buf_size_items = total_buf_size_bytes / sizeof(value_type);
- m_items_per_buf = std::max(1UL, total_buf_size_items / n_buffers);
- m_empty_buffers = new buffer_queue_type(n_buffers, m_items_per_buf);
- m_full_buffers = new buffer_queue_type();
-
- // Initialize empty buffer.
- m_cur_buffer = get_empty_buffer();
- m_bytes_written = 0;
-
- // Start the I/O thread.
- m_io_thread = new std::thread(io_thread_code<value_type>, this);
- }
-
- ~async_multipart_file_writer() {
- // Send the last incomplete buffer for writing.
- if (!(m_cur_buffer->empty())) {
- m_full_buffers->push(m_cur_buffer);
- m_full_buffers->m_cv.notify_one();
- m_cur_buffer = NULL;
- }
-
- // Let the I/O thread know that we're done.
- m_full_buffers->send_stop_signal();
- m_full_buffers->m_cv.notify_one();
-
- // Wait for the I/O thread to finish.
- m_io_thread->join();
-
- // Clean up.
- delete m_empty_buffers;
- delete m_full_buffers;
- delete m_io_thread;
- if (m_file != NULL)
- std::fclose(m_file);
- if (m_cur_buffer != NULL)
- delete m_cur_buffer;
- }
-
- inline void write(value_type x) {
- m_bytes_written += sizeof(value_type);
- m_cur_buffer->m_content[m_cur_buffer->m_filled++] = x;
- if (m_cur_buffer->full()) {
- m_full_buffers->push(m_cur_buffer);
- m_full_buffers->m_cv.notify_one();
- m_cur_buffer = get_empty_buffer();
- }
- }
-
- inline void write(const value_type *values, std::uint64_t length) {
- m_bytes_written += length * sizeof(value_type);
- while (length > 0) {
- std::uint64_t tocopy = std::min(length, m_cur_buffer->free_space());
- std::copy(values, values + tocopy, m_cur_buffer->m_content + m_cur_buffer->m_filled);
- m_cur_buffer->m_filled += tocopy;
- values += tocopy;
- length -= tocopy;
- if (m_cur_buffer->full()) {
- m_full_buffers->push(m_cur_buffer);
- m_full_buffers->m_cv.notify_one();
- m_cur_buffer = get_empty_buffer();
- }
- }
- }
-
- inline std::uint64_t bytes_written() const {
- return m_bytes_written;
- }
-};
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_FILE_WRITER_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_multifile_reader.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_multifile_reader.hpp
deleted file mode 100644
index 55900d80..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_multipart_multifile_reader.hpp
+++ /dev/null
@@ -1,302 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/io/async_multipart_multifile_reader.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_MULTIFILE_READER_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_MULTIFILE_READER_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <queue>
-#include <string>
-#include <algorithm>
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-
-#include "../utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-template<typename value_type>
-class async_multipart_multifile_reader {
- private:
- template<typename T>
- struct buffer {
- buffer(std::uint64_t size) {
- m_size = size;
- m_content = (T *)malloc(m_size * sizeof(T));
- m_filled = 0;
- m_is_filled = false;
- }
-
- void read_from_file(std::FILE *f) {
- m_filled = std::fread(m_content, sizeof(T), m_size, f);
- }
-
- ~buffer() {
- free(m_content);
- }
-
- inline std::uint64_t size_in_bytes() const { return sizeof(T) * m_filled; }
- inline bool empty() const { return m_filled == 0; }
-
- T *m_content;
- std::uint64_t m_filled;
- std::uint64_t m_size;
- bool m_is_filled;
- };
-
- template<typename buffer_type>
- struct request {
- request(buffer_type *buffer, std::uint64_t file_id) {
- m_buffer = buffer;
- m_file_id = file_id;
- }
-
- buffer_type *m_buffer;
- std::uint64_t m_file_id;
- };
-
- template<typename request_type>
- struct request_queue {
- request_queue()
- : m_no_more_requests(false) {}
-
- request_type get() {
- request_type ret = m_requests.front();
- m_requests.pop();
- return ret;
- }
-
- inline void add(request_type request) {
- std::lock_guard<std::mutex> lk(m_mutex);
- m_requests.push(request);
- }
-
- inline bool empty() const { return m_requests.empty(); }
-
- std::queue<request_type> m_requests;
- std::condition_variable m_cv;
- std::mutex m_mutex;
- bool m_no_more_requests;
- };
-
- private:
- template<typename T>
- static void async_io_thread_code(async_multipart_multifile_reader<T> *caller) {
- typedef buffer<T> buffer_type;
- typedef request<buffer_type> request_type;
- while (true) {
- // Wait for request or until 'no more requests' flag is set.
- std::unique_lock<std::mutex> lk(caller->m_read_requests.m_mutex);
- while (caller->m_read_requests.empty() &&
- !(caller->m_read_requests.m_no_more_requests))
- caller->m_read_requests.m_cv.wait(lk);
-
- if (caller->m_read_requests.empty() &&
- caller->m_read_requests.m_no_more_requests) {
- // No more requests -- exit.
- lk.unlock();
- break;
- }
-
- // Extract the buffer from the collection.
- request_type request = caller->m_read_requests.get();
- lk.unlock();
-
- // Process the request.
- if (caller->m_files[request.m_file_id] == NULL) {
- // Attempt to open and read from the file.
- std::string cur_part_filename = caller->m_filenames[request.m_file_id] +
- ".multipart_file.part" + utils::intToStr(caller->m_cur_part[request.m_file_id]);
- if (utils::file_exists(cur_part_filename)) {
- caller->m_files[request.m_file_id] = utils::file_open(cur_part_filename, "r");
- request.m_buffer->read_from_file(caller->m_files[request.m_file_id]);
- } else request.m_buffer->m_filled = 0;
- } else {
- request.m_buffer->read_from_file(caller->m_files[request.m_file_id]);
- if (request.m_buffer->empty()) {
- // Close and delete current file.
- std::fclose(caller->m_files[request.m_file_id]);
- caller->m_files[request.m_file_id] = NULL;
- std::string cur_part_filename = caller->m_filenames[request.m_file_id] +
- ".multipart_file.part" + utils::intToStr(caller->m_cur_part[request.m_file_id]);
- utils::file_delete(cur_part_filename);
-
- // Attempt to read from the next file.
- ++caller->m_cur_part[request.m_file_id];
- cur_part_filename = caller->m_filenames[request.m_file_id] +
- ".multipart_file.part" + utils::intToStr(caller->m_cur_part[request.m_file_id]);
- if (utils::file_exists(cur_part_filename)) {
- caller->m_files[request.m_file_id] = utils::file_open(cur_part_filename, "r");
- request.m_buffer->read_from_file(caller->m_files[request.m_file_id]);
- } else request.m_buffer->m_filled = 0;
- }
- }
- caller->m_bytes_read += request.m_buffer->size_in_bytes();
-
- // Update the status of the buffer
- // and notify the waiting thread.
- std::unique_lock<std::mutex> lk2(caller->m_mutexes[request.m_file_id]);
- request.m_buffer->m_is_filled = true;
- lk2.unlock();
- caller->m_cvs[request.m_file_id].notify_one();
- }
- }
-
- private:
- typedef buffer<value_type> buffer_type;
- typedef request<buffer_type> request_type;
-
- std::uint64_t m_bytes_read;
- std::uint64_t m_items_per_buf;
- std::uint64_t n_files;
- std::uint64_t m_files_added;
-
- std::FILE **m_files;
- std::string *m_filenames;
- std::uint64_t *m_cur_part;
-
- std::uint64_t *m_active_buffer_pos;
- buffer_type **m_active_buffers;
- buffer_type **m_passive_buffers;
- std::mutex *m_mutexes;
- std::condition_variable *m_cvs;
-
- request_queue<request_type> m_read_requests;
- std::thread *m_io_thread;
-
- private:
- void issue_read_request(std::uint64_t file_id) {
- request_type req(m_passive_buffers[file_id], file_id);
- m_read_requests.add(req);
- m_read_requests.m_cv.notify_one();
- }
-
- void receive_new_buffer(std::uint64_t file_id) {
- // Wait for the I/O thread to finish reading passive buffer.
- std::unique_lock<std::mutex> lk(m_mutexes[file_id]);
- while (m_passive_buffers[file_id]->m_is_filled == false)
- m_cvs[file_id].wait(lk);
-
- // Swap active and bassive buffers.
- std::swap(m_active_buffers[file_id], m_passive_buffers[file_id]);
- m_active_buffer_pos[file_id] = 0;
- m_passive_buffers[file_id]->m_is_filled = false;
- lk.unlock();
-
- // Issue the read request for the passive buffer.
- issue_read_request(file_id);
- }
-
- public:
- async_multipart_multifile_reader(std::uint64_t number_of_files,
- std::uint64_t buf_size_bytes = (1UL << 19)) {
- // Initialize basic parameters.
- n_files = number_of_files;
- m_files_added = 0;
- m_bytes_read = 0;
- m_items_per_buf = std::max(1UL, buf_size_bytes / sizeof(value_type));
-
- m_mutexes = new std::mutex[n_files];
- m_cvs = new std::condition_variable[n_files];
- m_active_buffer_pos = new std::uint64_t[n_files];
- m_files = new std::FILE*[n_files];
- m_filenames = new std::string[n_files];
- m_cur_part = new std::uint64_t[n_files];
- m_active_buffers = new buffer_type*[n_files];
- m_passive_buffers = new buffer_type*[n_files];
-
- for (std::uint64_t i = 0; i < n_files; ++i) {
- m_active_buffer_pos[i] = 0;
- m_active_buffers[i] = new buffer_type(m_items_per_buf);
- m_passive_buffers[i] = new buffer_type(m_items_per_buf);
- }
-
- m_io_thread = new std::thread(async_io_thread_code<value_type>, this);
- }
-
- // The added file gets the next available ID (file IDs start from 0).
- void add_file(std::string filename) {
- m_filenames[m_files_added] = filename;
- m_files[m_files_added] = NULL;
- m_cur_part[m_files_added] = 0;
- issue_read_request(m_files_added);
- ++m_files_added;
- }
-
- // Read from i-th file.
- value_type read_from_ith_file(std::uint64_t i) {
- if (m_active_buffer_pos[i] == m_active_buffers[i]->m_filled)
- receive_new_buffer(i);
- return m_active_buffers[i]->m_content[m_active_buffer_pos[i]++];
- }
-
- inline std::uint64_t bytes_read() const {
- return m_bytes_read;
- }
-
- ~async_multipart_multifile_reader() {
- // Let the I/O thread know that there
- // won't be any more requests.
- std::unique_lock<std::mutex> lk(m_read_requests.m_mutex);
- m_read_requests.m_no_more_requests = true;
- lk.unlock();
- m_read_requests.m_cv.notify_one();
-
- // Wait for the I/O to finish.
- m_io_thread->join();
- delete m_io_thread;
-
- // Delete buffers.
- for (std::uint64_t i = 0; i < n_files; ++i) {
- delete m_active_buffers[i];
- delete m_passive_buffers[i];
- }
-
- // Rest of the cleanup.
- delete[] m_active_buffers;
- delete[] m_passive_buffers;
- delete[] m_mutexes;
- delete[] m_cvs;
- delete[] m_active_buffer_pos;
- delete[] m_files;
- delete[] m_filenames;
- delete[] m_cur_part;
- }
-};
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_MULTIPART_MULTIFILE_READER_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_reader.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_reader.hpp
deleted file mode 100644
index 354a491d..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_reader.hpp
+++ /dev/null
@@ -1,355 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/io/async_stream_reader.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_READER_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_READER_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <queue>
-#include <string>
-#include <algorithm>
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-
-#include "../utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-template<typename value_type>
-class async_stream_reader {
- private:
- template<typename T>
- struct buffer {
- buffer(std::uint64_t size) {
- m_size = size;
- m_content = (T *)malloc(m_size * sizeof(T));
- m_filled = 0;
- }
-
- void read_from_file(std::FILE *f) {
- m_filled = std::fread(m_content, sizeof(T), m_size, f);
- }
-
- std::uint64_t size_in_bytes() const {
- return sizeof(T) * m_filled;
- }
-
- ~buffer() {
- free(m_content);
- }
-
- T *m_content;
- std::uint64_t m_size;
- std::uint64_t m_filled;
- };
-
- template<typename buffer_type>
- struct buffer_queue {
- buffer_queue(std::uint64_t n_buffers = 0, std::uint64_t items_per_buf = 0) {
- m_signal_stop = false;
- for (std::uint64_t i = 0; i < n_buffers; ++i)
- m_queue.push(new buffer_type(items_per_buf));
- }
-
- ~buffer_queue() {
- while (!m_queue.empty()) {
- buffer_type *buf = m_queue.front();
- m_queue.pop();
- delete buf;
- }
- }
-
- buffer_type *pop() {
- buffer_type *ret = m_queue.front();
- m_queue.pop();
- return ret;
- }
-
- void push(buffer_type *buf) {
- std::lock_guard<std::mutex> lk(m_mutex);
- m_queue.push(buf);
- }
-
- void send_stop_signal() {
- std::lock_guard<std::mutex> lk(m_mutex);
- m_signal_stop = true;
- }
-
- inline bool empty() const { return m_queue.empty(); }
-
- std::queue<buffer_type*> m_queue; // Must have FIFO property
- std::condition_variable m_cv;
- std::mutex m_mutex;
- bool m_signal_stop;
- };
-
- private:
- typedef buffer<value_type> buffer_type;
- typedef buffer_queue<buffer_type> buffer_queue_type;
-
- buffer_queue_type *m_empty_buffers;
- buffer_queue_type *m_full_buffers;
-
- private:
- template<typename T>
- static void io_thread_code(async_stream_reader<T> *caller) {
- typedef buffer<T> buffer_type;
- while (true) {
- // Wait for an empty buffer (or a stop signal).
- std::unique_lock<std::mutex> lk(caller->m_empty_buffers->m_mutex);
- while (caller->m_empty_buffers->empty() &&
- !(caller->m_empty_buffers->m_signal_stop))
- caller->m_empty_buffers->m_cv.wait(lk);
-
- if (caller->m_empty_buffers->empty()) {
- // We received the stop signal -- exit.
- lk.unlock();
- break;
- }
-
- // Extract the buffer from the queue.
- buffer_type *buffer = caller->m_empty_buffers->pop();
- lk.unlock();
-
- // Read the data from disk.
- buffer->read_from_file(caller->m_file);
- caller->m_bytes_read += buffer->size_in_bytes();
-
- // Check if we reached the end of file.
- bool end_of_file = false;
- if (buffer->m_filled < buffer->m_size)
- end_of_file = true;
-
- if (buffer->m_filled > 0) {
- // Add the buffer to the queue of filled buffers.
- caller->m_full_buffers->push(buffer);
- caller->m_full_buffers->m_cv.notify_one();
- } else {
- // Reinsert into the queue of empty buffers.
- caller->m_empty_buffers->push(buffer);
- }
-
- // If we reached the end of file -- exit.
- if (end_of_file == true) {
- caller->m_full_buffers->send_stop_signal();
- caller->m_full_buffers->m_cv.notify_one();
- break;
- }
- }
- }
-
- public:
- void receive_new_buffer() {
- // Push the current buffer back to the poll of empty buffers.
- if (m_cur_buffer != NULL) {
- m_empty_buffers->push(m_cur_buffer);
- m_empty_buffers->m_cv.notify_one();
- m_cur_buffer = NULL;
- }
-
- // Extract a filled buffer.
- std::unique_lock<std::mutex> lk(m_full_buffers->m_mutex);
- while (m_full_buffers->empty() && !(m_full_buffers->m_signal_stop))
- m_full_buffers->m_cv.wait(lk);
- m_cur_buffer_pos = 0;
- if (m_full_buffers->empty()) {
- lk.unlock();
- m_cur_buffer_filled = 0;
- } else {
- m_cur_buffer = m_full_buffers->pop();
- lk.unlock();
- m_cur_buffer_filled = m_cur_buffer->m_filled;
- }
- }
-
- private:
- std::FILE *m_file;
- std::uint64_t m_bytes_read;
- std::uint64_t m_cur_buffer_pos;
- std::uint64_t m_cur_buffer_filled;
- buffer_type *m_cur_buffer;
- std::thread *m_io_thread;
-
- public:
- // Default constructor, reads from stdin.
- async_stream_reader() {
- init("", (8UL << 20), 4UL, 0UL);
- }
-
- // Constructor, default buffer sizes, no skip.
- async_stream_reader(std::string filename) {
- init(filename, (8UL << 20), 4UL, 0UL);
- }
-
- // Constructor, default buffer sizes, given skip.
- async_stream_reader(std::string filename,
- std::uint64_t n_skip_bytes) {
- init(filename, (8UL << 20), 4UL, n_skip_bytes);
- }
-
- // Constructor, no skip, given buffer sizes.
- async_stream_reader(std::string filename,
- std::uint64_t total_buf_size_bytes,
- std::uint64_t n_buffers) {
- init(filename, total_buf_size_bytes, n_buffers, 0UL);
- }
-
- // Constructor, given buffer sizes and skip.
- async_stream_reader(std::string filename,
- std::uint64_t total_buf_size_bytes,
- std::uint64_t n_buffers,
- std::uint64_t n_skip_bytes) {
- init(filename, total_buf_size_bytes, n_buffers, n_skip_bytes);
- }
-
- // Main initializing function.
- void init(std::string filename,
- std::uint64_t total_buf_size_bytes,
- std::uint64_t n_buffers,
- std::uint64_t n_skip_bytes) {
- if (filename.empty()) m_file = stdin;
- else m_file = utils::file_open_nobuf(filename.c_str(), "r");
-
- if (m_file != stdin && n_skip_bytes > 0)
- std::fseek(m_file, n_skip_bytes, SEEK_SET);
-
- // Initialize counters.
- m_bytes_read = 0;
- m_cur_buffer_pos = 0;
- m_cur_buffer_filled = 0;
- m_cur_buffer = NULL;
-
- // Allocate buffers.
- std::uint64_t total_buf_size_items = total_buf_size_bytes / sizeof(value_type);
- std::uint64_t items_per_buf = std::max(1UL, total_buf_size_items / n_buffers);
- m_empty_buffers = new buffer_queue_type(n_buffers, items_per_buf);
- m_full_buffers = new buffer_queue_type();
-
- // Start the I/O thread.
- m_io_thread = new std::thread(io_thread_code<value_type>, this);
- }
-
- // Return the next item in the stream.
- inline value_type read() {
- if (m_cur_buffer_pos == m_cur_buffer_filled)
- receive_new_buffer();
-
- return m_cur_buffer->m_content[m_cur_buffer_pos++];
- }
-
- // Read 'howmany' items into 'dest'.
- void read(value_type *dest, std::uint64_t howmany) {
- while (howmany > 0) {
- if (m_cur_buffer_pos == m_cur_buffer_filled)
- receive_new_buffer();
-
- std::uint64_t cur_buf_left = m_cur_buffer_filled - m_cur_buffer_pos;
- std::uint64_t tocopy = std::min(howmany, cur_buf_left);
- for (std::uint64_t i = 0; i < tocopy; ++i)
- dest[i] = m_cur_buffer->m_content[m_cur_buffer_pos + i];
- m_cur_buffer_pos += tocopy;
- dest += tocopy;
- howmany -= tocopy;
- }
- }
-
- // Skip the next 'howmany' items in the stream.
- void skip(std::uint64_t howmany) {
- while (howmany > 0) {
- if (m_cur_buffer_pos == m_cur_buffer_filled)
- receive_new_buffer();
-
- std::uint64_t toskip = std::min(howmany, m_cur_buffer_filled - m_cur_buffer_pos);
- m_cur_buffer_pos += toskip;
- howmany -= toskip;
- }
- }
-
- // Return the next item in the stream.
- inline value_type peek() {
- if (m_cur_buffer_pos == m_cur_buffer_filled)
- receive_new_buffer();
-
- return m_cur_buffer->m_content[m_cur_buffer_pos];
- }
-
- // True iff there are no more items in the stream.
- inline bool empty() {
- if (m_cur_buffer_pos == m_cur_buffer_filled)
- receive_new_buffer();
-
- return (m_cur_buffer_pos == m_cur_buffer_filled);
- }
-
- // Return const ptr to internal buffer.
- const value_type *get_buf_ptr() const {
- return m_cur_buffer->m_content;
- }
-
- // Return the number of items in the internal buffer.
- std::uint64_t get_buf_filled() const {
- return m_cur_buffer_filled;
- }
-
- // Performed I/O in bytes.
- inline std::uint64_t bytes_read() const {
- return m_bytes_read;
- }
-
- // Destructor.
- ~async_stream_reader() {
- // Let the I/O thread know that we're done.
- m_empty_buffers->send_stop_signal();
- m_empty_buffers->m_cv.notify_one();
-
- // Wait for the thread to finish.
- m_io_thread->join();
-
- // Clean up.
- delete m_empty_buffers;
- delete m_full_buffers;
- delete m_io_thread;
- if (m_file != stdin)
- std::fclose(m_file);
-
- if (m_cur_buffer != NULL)
- delete m_cur_buffer;
- }
-};
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_READER_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_writer.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_writer.hpp
deleted file mode 100644
index da5b853f..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/io/async_stream_writer.hpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/io/async_stream_writer.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_WRITER_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_WRITER_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <queue>
-#include <string>
-#include <algorithm>
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-
-#include "../utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-template<typename value_type>
-class async_stream_writer {
- private:
- template<typename T>
- struct buffer {
- buffer(std::uint64_t size) {
- m_size = size;
- m_content = (T *)malloc(m_size * sizeof(T));
- m_filled = 0;
- }
-
- void write_to_file(std::FILE *f) {
- utils::write_to_file(m_content, m_filled, f);
- m_filled = 0;
- }
-
- ~buffer() {
- free(m_content);
- }
-
- inline bool empty() const { return m_filled == 0; }
- inline bool full() const { return m_filled == m_size; }
- inline std::uint64_t size_in_bytes() const { return sizeof(T) * m_filled; }
- inline std::uint64_t free_space() const { return m_size - m_filled; }
-
- T *m_content;
- std::uint64_t m_size;
- std::uint64_t m_filled;
- };
-
- template<typename buffer_type>
- struct buffer_queue {
- buffer_queue(std::uint64_t n_buffers = 0, std::uint64_t items_per_buf = 0) {
- m_signal_stop = false;
- for (std::uint64_t i = 0; i < n_buffers; ++i)
- m_queue.push(new buffer_type(items_per_buf));
- }
-
- ~buffer_queue() {
- while (!m_queue.empty()) {
- buffer_type *buf = m_queue.front();
- m_queue.pop();
- delete buf;
- }
- }
-
- buffer_type *pop() {
- buffer_type *ret = m_queue.front();
- m_queue.pop();
- return ret;
- }
-
- void push(buffer_type *buf) {
- std::lock_guard<std::mutex> lk(m_mutex);
- m_queue.push(buf);
- }
-
- void send_stop_signal() {
- std::lock_guard<std::mutex> lk(m_mutex);
- m_signal_stop = true;
- }
-
- inline bool empty() const { return m_queue.empty(); }
-
- std::queue<buffer_type*> m_queue; // Must have FIFO property
- std::condition_variable m_cv;
- std::mutex m_mutex;
- bool m_signal_stop;
- };
-
- private:
- typedef buffer<value_type> buffer_type;
- typedef buffer_queue<buffer_type> buffer_queue_type;
-
- buffer_queue_type *m_empty_buffers;
- buffer_queue_type *m_full_buffers;
-
- private:
- template<typename T>
- static void io_thread_code(async_stream_writer<T> *caller) {
- typedef buffer<T> buffer_type;
- while (true) {
- // Wait for the full buffer (or a stop signal).
- std::unique_lock<std::mutex> lk(caller->m_full_buffers->m_mutex);
- while (caller->m_full_buffers->empty() &&
- !(caller->m_full_buffers->m_signal_stop))
- caller->m_full_buffers->m_cv.wait(lk);
-
- if (caller->m_full_buffers->empty()) {
- // We received the stop signal -- exit.
- lk.unlock();
- break;
- }
-
- // Extract the buffer from the collection.
- buffer_type *buffer = caller->m_full_buffers->pop();
- lk.unlock();
-
- // Write the data to disk.
- buffer->write_to_file(caller->m_file);
-
- // Add the (now empty) buffer to the collection
- // of empty buffers and notify the waiting thread.
- caller->m_empty_buffers->push(buffer);
- caller->m_empty_buffers->m_cv.notify_one();
- }
- }
-
- // Get a free buffer from the collection of free buffers.
- buffer_type* get_empty_buffer() {
- std::unique_lock<std::mutex> lk(m_empty_buffers->m_mutex);
- while (m_empty_buffers->empty())
- m_empty_buffers->m_cv.wait(lk);
- buffer_type *ret = m_empty_buffers->pop();
- lk.unlock();
- return ret;
- }
-
- private:
- std::FILE *m_file;
-
- std::uint64_t m_bytes_written;
- std::uint64_t m_items_per_buf;
-
- buffer_type *m_cur_buffer;
- std::thread *m_io_thread;
-
- public:
- async_stream_writer(std::string filename = std::string(""),
- std::uint64_t total_buf_size_bytes = (8UL << 20),
- std::uint64_t n_buffers = 4UL,
- std::string write_mode = std::string("w")) {
- if (filename.empty()) m_file = stdout;
- else m_file = utils::file_open_nobuf(filename.c_str(), write_mode);
-
- // Allocate buffers.
- std::uint64_t total_buf_size_items = total_buf_size_bytes / sizeof(value_type);
- m_items_per_buf = std::max(1UL, total_buf_size_items / n_buffers);
- m_empty_buffers = new buffer_queue_type(n_buffers, m_items_per_buf);
- m_full_buffers = new buffer_queue_type();
-
- // Initialize empty buffer.
- m_cur_buffer = get_empty_buffer();
- m_bytes_written = 0;
-
- // Start the I/O thread.
- m_io_thread = new std::thread(io_thread_code<value_type>, this);
- }
-
- // Write item x to the stream.
- inline void write(value_type x) {
- m_bytes_written += sizeof(value_type);
- m_cur_buffer->m_content[m_cur_buffer->m_filled++] = x;
- if (m_cur_buffer->full()) {
- m_full_buffers->push(m_cur_buffer);
- m_full_buffers->m_cv.notify_one();
- m_cur_buffer = get_empty_buffer();
- }
- }
-
- // Write values[0..length) to the stream.
- inline void write(const value_type *values, std::uint64_t length) {
- m_bytes_written += length * sizeof(value_type);
- while (length > 0) {
- std::uint64_t tocopy = std::min(length, m_cur_buffer->free_space());
- std::copy(values, values + tocopy, m_cur_buffer->m_content + m_cur_buffer->m_filled);
- m_cur_buffer->m_filled += tocopy;
- values += tocopy;
- length -= tocopy;
- if (m_cur_buffer->full()) {
- m_full_buffers->push(m_cur_buffer);
- m_full_buffers->m_cv.notify_one();
- m_cur_buffer = get_empty_buffer();
- }
- }
- }
-
- // Return performed I/O in bytes.
- inline std::uint64_t bytes_written() const {
- return m_bytes_written;
- }
-
- // Destructor.
- ~async_stream_writer() {
- // Send the last incomplete buffer for writing.
- if (!(m_cur_buffer->empty())) {
- m_full_buffers->push(m_cur_buffer);
- m_full_buffers->m_cv.notify_one();
- m_cur_buffer = NULL;
- }
-
- // Let the I/O thread know that we're done.
- m_full_buffers->send_stop_signal();
- m_full_buffers->m_cv.notify_one();
-
- // Wait for the I/O thread to finish.
- m_io_thread->join();
-
- // Delete buffers and close the file.
- delete m_empty_buffers;
- delete m_full_buffers;
- delete m_io_thread;
-
- if (m_file != stdout)
- std::fclose(m_file);
-
- if (m_cur_buffer != NULL)
- delete m_cur_buffer;
- }
-};
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_IO_ASYNC_STREAM_WRITER_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/process_halfsegment_pairs.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/process_halfsegment_pairs.hpp
deleted file mode 100644
index c6a17fa5..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/process_halfsegment_pairs.hpp
+++ /dev/null
@@ -1,567 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/process_halfsegment_pairs.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_PROCESS_HALFSEGMENT_PAIRS_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_PROCESS_HALFSEGMENT_PAIRS_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdint>
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <omp.h>
-
-#include "io/async_stream_reader.hpp"
-#include "io/async_stream_writer.hpp"
-#include "io/async_multi_stream_writer.hpp"
-#include "utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-
-std::uint64_t naive_lcp(std::uint64_t i, std::uint64_t j, std::uint64_t lcp,
- std::FILE *f_text, std::uint64_t text_length, std::uint64_t &io_volume) {
- static const std::uint64_t bufsize = (1L << 20);
- std::uint8_t *b1 = new std::uint8_t[bufsize];
- std::uint8_t *b2 = new std::uint8_t[bufsize];
- std::uint64_t io_vol = 0;
- while (true) {
- std::uint64_t toread = std::min(bufsize, text_length - std::max(i, j) - lcp);
- if (!toread) break;
- utils::read_at_offset(b1, i + lcp, toread, f_text);
- utils::read_at_offset(b2, j + lcp, toread, f_text);
- io_vol += 2UL * toread;
- std::uint64_t lcp_delta = 0;
- while (lcp_delta < toread && b1[lcp_delta] == b2[lcp_delta])
- ++lcp_delta;
- lcp += lcp_delta;
- if (lcp_delta < toread)
- break;
- }
- delete[] b1;
- delete[] b2;
- io_volume += io_vol;
- return lcp;
-}
-
-struct buf_item_ext {
- std::uint64_t m_left_idx;
- std::uint64_t m_right_idx;
- std::uint64_t m_ans;
- std::uint64_t m_block_id;
-};
-
-template<typename text_offset_type, typename ext_text_offset_type>
-std::uint64_t process_halfsegment_pairs(std::string text_filename,
- std::uint64_t text_length, std::uint64_t max_block_size_B,
- std::uint64_t max_halfsegment_size, std::uint64_t max_overflow_size,
- std::string **pairs_filenames, std::string *irreducible_bits_filenames,
- std::uint64_t &total_io_volume) {
- fprintf(stderr, " Compute irreducible LCP values:\n");
- long double start = utils::wclock();
-
- // Initialize basic parameters.
- std::uint64_t n_blocks_B = (2UL * text_length + max_block_size_B - 1) / max_block_size_B;
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size;
- std::uint64_t sum_irreducible_lcps = 0;
-
- // Open file with text.
- std::FILE *f_text = utils::file_open(text_filename, "r");
-
- // Initialize multiwriter of values 2i + PLCP[i].
- typedef async_multi_stream_writer<ext_text_offset_type> lcp_multiwriter_type;
- lcp_multiwriter_type *lcp_multiwriter = NULL;
- {
- static const std::uint64_t n_free_buffers = 4;
- std::uint64_t buffer_size = (1UL << 20);
- lcp_multiwriter = new lcp_multiwriter_type(buffer_size, n_free_buffers);
- for (std::uint64_t block_id = 0; block_id < n_blocks_B; ++block_id)
- lcp_multiwriter->add_file(irreducible_bits_filenames[block_id]);
- }
-
- // Allocate halfsegments.
- std::uint8_t *left_halfsegment = (std::uint8_t *)malloc(max_halfsegment_size + max_overflow_size);
- std::uint8_t *right_halfsegment = (std::uint8_t *)malloc(max_halfsegment_size + max_overflow_size);
-
- // Allocate buffers.
- static const std::uint64_t local_buf_size = (1UL << 20);
- text_offset_type *idx_buf = new text_offset_type[local_buf_size * 2];
-#ifdef _OPENMP
- buf_item_ext *ans_buf = new buf_item_ext[local_buf_size];
-#endif
-
- // Processing of halfsegment pairs follows.
- for (std::uint64_t left_halfsegment_id = 0; left_halfsegment_id < n_halfsegments; ++left_halfsegment_id) {
- std::uint64_t left_halfsegment_beg = left_halfsegment_id * max_halfsegment_size;
- std::uint64_t left_halfsegment_end = std::min(left_halfsegment_beg + max_halfsegment_size, text_length);
- std::uint64_t left_halfsegment_ext_end = std::min(left_halfsegment_end + max_overflow_size, text_length);
- std::uint64_t left_halfsegment_ext_size = left_halfsegment_ext_end - left_halfsegment_beg;
- bool left_halfsegment_loaded = false;
-
- // Scan all halfsegments to the right of left_halfsegment_id.
- for (std::uint64_t right_halfsegment_id = left_halfsegment_id; right_halfsegment_id < n_halfsegments; right_halfsegment_id++) {
- std::uint64_t right_halfsegment_beg = right_halfsegment_id * max_halfsegment_size;
- std::uint64_t right_halfsegment_end = std::min(right_halfsegment_beg + max_halfsegment_size, text_length);
- std::uint64_t right_halfsegment_ext_end = std::min(right_halfsegment_end + max_overflow_size, text_length);
- std::uint64_t right_halfsegment_ext_size = right_halfsegment_ext_end - right_halfsegment_beg;
-
- // Check if that pair of halfsegments has any associated pairs.
- std::string pairs_filename = pairs_filenames[left_halfsegment_id][right_halfsegment_id];
- if (utils::file_exists(pairs_filename) == false || utils::file_size(pairs_filename) == 0) {
- if (utils::file_exists(pairs_filename))
- utils::file_delete(pairs_filename);
- continue;
- }
-
- // Print initial progress message.
- fprintf(stderr, " Process halfsegments %lu and %lu: ", left_halfsegment_id, right_halfsegment_id);
- long double halfsegment_process_start = utils::wclock();
- std::uint64_t local_lcp_sum = 0;
- std::uint64_t extra_io = 0;
- std::uint64_t io_vol = 0;
-
- // Initialize reading from file associated with current pair of halfsegments.
- typedef async_stream_reader<text_offset_type> pair_reader_type;
- std::uint64_t n_pairs = utils::file_size(pairs_filename) / (2 * sizeof(text_offset_type));
- pair_reader_type *pair_reader = new pair_reader_type(pairs_filename);
-
- // Read left halfsegment from disk (if it wasn't already)
- if (left_halfsegment_loaded == false) {
- utils::read_at_offset(left_halfsegment, left_halfsegment_beg, left_halfsegment_ext_size, text_filename);
- left_halfsegment_loaded = true;
- extra_io += left_halfsegment_ext_size;
- }
-
- // Read right halfsegment from disk.
- std::uint8_t *right_halfsegment_ptr = right_halfsegment;
- if (right_halfsegment_id != left_halfsegment_id) {
- utils::read_at_offset(right_halfsegment, right_halfsegment_beg, right_halfsegment_ext_size, text_filename);
- extra_io += right_halfsegment_ext_size;
- } else right_halfsegment_ptr = left_halfsegment;
-
- std::uint64_t pairs_processed = 0;
- while (pairs_processed < n_pairs) {
- std::uint64_t filled = std::min(n_pairs - pairs_processed, local_buf_size);
- pair_reader->read(idx_buf, filled * 2);
-
-#ifdef _OPENMP
- std::vector<std::uint64_t> long_lcps;
- std::uint64_t max_threads = omp_get_max_threads();
- std::uint64_t max_block_size = (filled + max_threads - 1) / max_threads;
- std::uint64_t n_threads = (filled + max_block_size - 1) / max_block_size;
- #pragma omp parallel num_threads(n_threads)
- {
- std::uint64_t thread_id = omp_get_thread_num();
- std::uint64_t block_beg = thread_id * max_block_size;
- std::uint64_t block_end = std::min(block_beg + max_block_size, filled);
- std::vector<std::uint64_t> local_long_lcps;
- std::uint64_t thread_lcp_sum = 0;
-
- for (std::uint64_t j = block_beg; j < block_end; ++j) {
- std::uint64_t i = idx_buf[2 * j];
- std::uint64_t phi_i = idx_buf[2 * j + 1];
- std::uint64_t left_idx = i;
- std::uint64_t right_idx = phi_i;
- if (!(left_halfsegment_beg <= left_idx && left_idx < left_halfsegment_end &&
- right_halfsegment_beg <= right_idx && right_idx < right_halfsegment_end))
- std::swap(left_idx, right_idx);
-
- // Compute LCP value.
- std::uint64_t lcp = 0;
- while (left_idx + lcp < left_halfsegment_ext_end &&
- right_idx + lcp < right_halfsegment_ext_end &&
- left_halfsegment[left_idx - left_halfsegment_beg + lcp] ==
- right_halfsegment_ptr[right_idx - right_halfsegment_beg + lcp])
- ++lcp;
-
- // If the LCP computation cannot be completed, add it to the list of unfinished LCPs.
- if ((left_idx + lcp == left_halfsegment_ext_end && left_halfsegment_ext_end < text_length) ||
- (right_idx + lcp == right_halfsegment_ext_end && right_halfsegment_ext_end < text_length)) {
- ans_buf[j].m_left_idx = left_idx;
- ans_buf[j].m_right_idx = right_idx;
- ans_buf[j].m_ans = lcp;
- local_long_lcps.push_back(j);
- } else {
- std::uint64_t pos_in_B = 2UL * i + lcp;
- std::uint64_t block_id = pos_in_B / max_block_size_B;
- ans_buf[j].m_ans = pos_in_B;
- ans_buf[j].m_block_id = block_id;
- thread_lcp_sum += lcp;
- }
- }
-
- #pragma omp critical
- {
- // Concatenate the list of long LCP processed by a given thread with a global list.
- long_lcps.insert(long_lcps.end(), local_long_lcps.begin(), local_long_lcps.end());
- local_lcp_sum += thread_lcp_sum;
- }
- }
-
- // Finish the computatino of long LCPs using naive method.
- for (std::uint64_t j = 0; j < long_lcps.size(); ++j) {
- std::uint64_t which = long_lcps[j];
-
- // Retreive indexes from the buffer.
- std::uint64_t i = idx_buf[2 * which];
- std::uint64_t left_idx = ans_buf[which].m_left_idx;
- std::uint64_t right_idx = ans_buf[which].m_right_idx;
- std::uint64_t lcp = ans_buf[which].m_ans;
-
- // Compute LCP.
- lcp = naive_lcp(left_idx, right_idx, lcp, f_text, text_length, io_vol);
-
- // Compute answer.
- std::uint64_t pos_in_B = 2UL * i + lcp;
- std::uint64_t block_id = pos_in_B / max_block_size_B;
-
- // Write answer to buffer.
- ans_buf[which].m_ans = pos_in_B;
- ans_buf[which].m_block_id = block_id;
-
- // Update stats.
- local_lcp_sum += lcp;
- }
-
- // Write LCPs to file.
- for (std::uint64_t j = 0; j < filled; ++j)
- lcp_multiwriter->write_to_ith_file(ans_buf[j].m_block_id, ans_buf[j].m_ans);
-
-#else
- for (std::uint64_t j = 0; j < filled; ++j) {
- std::uint64_t i = idx_buf[2 * j];
- std::uint64_t phi_i = idx_buf[2 * j + 1];
- std::uint64_t left_idx = i;
- std::uint64_t right_idx = phi_i;
- if (!(left_halfsegment_beg <= left_idx && left_idx < left_halfsegment_end &&
- right_halfsegment_beg <= right_idx && right_idx < right_halfsegment_end))
- std::swap(left_idx, right_idx);
-
- // Compute LCP value.
- std::uint64_t lcp = 0;
- while (left_idx + lcp < left_halfsegment_ext_end && right_idx + lcp < right_halfsegment_ext_end &&
- left_halfsegment[left_idx - left_halfsegment_beg + lcp] == right_halfsegment_ptr[right_idx - right_halfsegment_beg + lcp])
- ++lcp;
-
- // Finish the long LCP using naive method.
- if ((left_idx + lcp == left_halfsegment_ext_end && left_halfsegment_ext_end < text_length) ||
- (right_idx + lcp == right_halfsegment_ext_end && right_halfsegment_ext_end < text_length))
- lcp = naive_lcp(left_idx, right_idx, lcp, f_text, text_length, io_vol);
-
- // Write LCP to file.
- std::uint64_t pos_in_B = 2 * i + lcp;
- std::uint64_t block_id = pos_in_B / max_block_size_B;
- lcp_multiwriter->write_to_ith_file(block_id, pos_in_B);
- local_lcp_sum += lcp;
- }
-#endif
-
- pairs_processed += filled;
- }
-
- // Update I/O volume.
- io_vol += pair_reader->bytes_read() + extra_io + n_pairs * sizeof(ext_text_offset_type);
- total_io_volume += io_vol;
-
- // Clean up.
- delete pair_reader;
- utils::file_delete(pairs_filename);
-
- // Update statistics.
- sum_irreducible_lcps += local_lcp_sum;
-
- // Print summary.
- long double avg_lcp = (long double)local_lcp_sum / (long double)std::max(1UL, n_pairs);
- long double elapsed = utils::wclock() - halfsegment_process_start;
- fprintf(stderr, "time = %.1Lfs, I/O = %.1LfMiB/s, avg_lcp = %.2Lf, total I/O vol = %.2Lfn\n", elapsed,
- (1.L * io_vol / (1L << 20)) / elapsed, avg_lcp, (1.L * total_io_volume) / text_length);
- }
- }
-
- // Clean up.
- delete[] idx_buf;
-#ifdef _OPENMP
- delete[] ans_buf;
-#endif
- delete lcp_multiwriter;
- std::fclose(f_text);
- free(left_halfsegment);
- free(right_halfsegment);
-
- // Print summary.
- long double total_time = utils::wclock() - start;
- fprintf(stderr, " Total time: %.2Lfs, total I/O vol = %.2Lfn\n",
- total_time, (1.L * total_io_volume) / text_length);
-
- return sum_irreducible_lcps;
-}
-
-struct buf_item {
- std::uint64_t m_left_idx;
- std::uint64_t m_right_idx;
- std::uint64_t m_ans;
-};
-
-template<typename text_offset_type, typename ext_text_offset_type>
-std::uint64_t process_halfsegment_pairs(std::string text_filename,
- std::uint64_t text_length, std::uint64_t max_halfsegment_size,
- std::uint64_t max_overflow_size, std::string **pairs_filenames,
- std::string output_filename, std::uint64_t &total_io_volume) {
- fprintf(stderr, " Compute irreducible LCP values:\n");
- long double start = utils::wclock();
-
- // Initialize basic parameters.
- std::uint64_t n_halfsegments = (text_length + max_halfsegment_size - 1) / max_halfsegment_size;
- std::uint64_t sum_irreducible_lcps = 0;
-
- // Open file with text.
- std::FILE *f_text = utils::file_open(text_filename, "r");
-
- // Initialize writer of values 2i + PLCP[i].
- typedef async_stream_writer<ext_text_offset_type> lcp_writer_type;
- lcp_writer_type *lcp_writer = new lcp_writer_type(output_filename);
-
- // Allocate halfsegments.
- std::uint8_t *left_halfsegment = (std::uint8_t *)malloc(max_halfsegment_size + max_overflow_size);
- std::uint8_t *right_halfsegment = (std::uint8_t *)malloc(max_halfsegment_size + max_overflow_size);
-
- // Allocate buffers.
- static const std::uint64_t local_buf_size = (1UL << 20);
- text_offset_type *idx_buf = new text_offset_type[local_buf_size * 2];
-#ifdef _OPENMP
- buf_item *ans_buf = new buf_item[local_buf_size];
-#endif
-
- // Processing of halfsegment pairs follows.
- for (std::uint64_t left_halfsegment_id = 0; left_halfsegment_id < n_halfsegments; ++left_halfsegment_id) {
- std::uint64_t left_halfsegment_beg = left_halfsegment_id * max_halfsegment_size;
- std::uint64_t left_halfsegment_end = std::min(left_halfsegment_beg + max_halfsegment_size, text_length);
- std::uint64_t left_halfsegment_ext_end = std::min(left_halfsegment_end + max_overflow_size, text_length);
- std::uint64_t left_halfsegment_ext_size = left_halfsegment_ext_end - left_halfsegment_beg;
- bool left_halfsegment_loaded = false;
-
- // Scan all halfsegments to the right of left_halfsegment_id.
- for (std::uint64_t right_halfsegment_id = left_halfsegment_id; right_halfsegment_id < n_halfsegments; right_halfsegment_id++) {
- std::uint64_t right_halfsegment_beg = right_halfsegment_id * max_halfsegment_size;
- std::uint64_t right_halfsegment_end = std::min(right_halfsegment_beg + max_halfsegment_size, text_length);
- std::uint64_t right_halfsegment_ext_end = std::min(right_halfsegment_end + max_overflow_size, text_length);
- std::uint64_t right_halfsegment_ext_size = right_halfsegment_ext_end - right_halfsegment_beg;
-
- // Check if that pair of halfsegments has any associated pairs.
- std::string pairs_filename = pairs_filenames[left_halfsegment_id][right_halfsegment_id];
- if (utils::file_exists(pairs_filename) == false || utils::file_size(pairs_filename) == 0) {
- if (utils::file_exists(pairs_filename))
- utils::file_delete(pairs_filename);
- continue;
- }
-
- // Print initial progress message.
- fprintf(stderr, " Process halfsegments %lu and %lu: ", left_halfsegment_id, right_halfsegment_id);
- long double halfsegment_process_start = utils::wclock();
- std::uint64_t local_lcp_sum = 0;
- std::uint64_t extra_io = 0;
- std::uint64_t io_vol = 0;
-
- // Initialize reading from file associated with current pair of halfsegments.
- typedef async_stream_reader<text_offset_type> pair_reader_type;
- std::uint64_t n_pairs = utils::file_size(pairs_filename) / (2 * sizeof(text_offset_type));
- pair_reader_type *pair_reader = new pair_reader_type(pairs_filename);
-
- // Read left halfsegment from disk (if it wasn't already)
- if (left_halfsegment_loaded == false) {
- utils::read_at_offset(left_halfsegment, left_halfsegment_beg, left_halfsegment_ext_size, text_filename);
- left_halfsegment_loaded = true;
- extra_io += left_halfsegment_ext_size;
- }
-
- // Read right halfsegment from disk.
- std::uint8_t *right_halfsegment_ptr = right_halfsegment;
- if (right_halfsegment_id != left_halfsegment_id) {
- utils::read_at_offset(right_halfsegment, right_halfsegment_beg, right_halfsegment_ext_size, text_filename);
- extra_io += right_halfsegment_ext_size;
- } else right_halfsegment_ptr = left_halfsegment;
-
- std::uint64_t pairs_processed = 0;
- while (pairs_processed < n_pairs) {
- std::uint64_t filled = std::min(n_pairs - pairs_processed, local_buf_size);
- pair_reader->read(idx_buf, filled * 2);
-
-#ifdef _OPENMP
- std::vector<std::uint64_t> long_lcps;
- std::uint64_t max_threads = omp_get_max_threads();
- std::uint64_t max_block_size = (filled + max_threads - 1) / max_threads;
- std::uint64_t n_threads = (filled + max_block_size - 1) / max_block_size;
- #pragma omp parallel num_threads(n_threads)
- {
- std::uint64_t thread_id = omp_get_thread_num();
- std::uint64_t block_beg = thread_id * max_block_size;
- std::uint64_t block_end = std::min(block_beg + max_block_size, filled);
- std::vector<std::uint64_t> local_long_lcps;
- std::uint64_t thread_lcp_sum = 0;
-
- for (std::uint64_t j = block_beg; j < block_end; ++j) {
- std::uint64_t i = idx_buf[2 * j];
- std::uint64_t phi_i = idx_buf[2 * j + 1];
- std::uint64_t left_idx = i;
- std::uint64_t right_idx = phi_i;
- if (!(left_halfsegment_beg <= left_idx && left_idx < left_halfsegment_end &&
- right_halfsegment_beg <= right_idx && right_idx < right_halfsegment_end))
- std::swap(left_idx, right_idx);
-
- // Compute LCP value.
- std::uint64_t lcp = 0;
- while (left_idx + lcp < left_halfsegment_ext_end &&
- right_idx + lcp < right_halfsegment_ext_end &&
- left_halfsegment[left_idx - left_halfsegment_beg + lcp] ==
- right_halfsegment_ptr[right_idx - right_halfsegment_beg + lcp])
- ++lcp;
-
- // If the LCP computation cannot be completed, add it to the list of unfinished LCPs.
- if ((left_idx + lcp == left_halfsegment_ext_end && left_halfsegment_ext_end < text_length) ||
- (right_idx + lcp == right_halfsegment_ext_end && right_halfsegment_ext_end < text_length)) {
- ans_buf[j].m_left_idx = left_idx;
- ans_buf[j].m_right_idx = right_idx;
- ans_buf[j].m_ans = lcp;
- local_long_lcps.push_back(j);
- } else {
- std::uint64_t pos_in_B = 2UL * i + lcp;
- ans_buf[j].m_ans = pos_in_B;
- thread_lcp_sum += lcp;
- }
- }
-
- #pragma omp critical
- {
- // Concatenate the list of long LCP processed by a given thread with a global list.
- long_lcps.insert(long_lcps.end(), local_long_lcps.begin(), local_long_lcps.end());
- local_lcp_sum += thread_lcp_sum;
- }
- }
-
- // Finish the computation of long LCPs using naive method.
- for (std::uint64_t j = 0; j < long_lcps.size(); ++j) {
- std::uint64_t which = long_lcps[j];
-
- // Retreive indexes from the buffer.
- std::uint64_t i = idx_buf[2 * which];
- std::uint64_t left_idx = ans_buf[which].m_left_idx;
- std::uint64_t right_idx = ans_buf[which].m_right_idx;
- std::uint64_t lcp = ans_buf[which].m_ans;
-
- // Compute LCP.
- lcp = naive_lcp(left_idx, right_idx, lcp, f_text, text_length, io_vol);
-
- // Compute answer.
- std::uint64_t pos_in_B = 2UL * i + lcp;
-
- // Write answer to buffer.
- ans_buf[which].m_ans = pos_in_B;
-
- // Update stats.
- local_lcp_sum += lcp;
- }
-
- // Write LCPs to file.
- for (std::uint64_t j = 0; j < filled; ++j)
- lcp_writer->write(ans_buf[j].m_ans);
-#else
- for (std::uint64_t j = 0; j < filled; ++j) {
- std::uint64_t i = idx_buf[2 * j];
- std::uint64_t phi_i = idx_buf[2 * j + 1];
- std::uint64_t left_idx = i;
- std::uint64_t right_idx = phi_i;
- if (!(left_halfsegment_beg <= left_idx && left_idx < left_halfsegment_end &&
- right_halfsegment_beg <= right_idx && right_idx < right_halfsegment_end))
- std::swap(left_idx, right_idx);
-
- // Compute LCP value.
- std::uint64_t lcp = 0;
- while (left_idx + lcp < left_halfsegment_ext_end && right_idx + lcp < right_halfsegment_ext_end &&
- left_halfsegment[left_idx - left_halfsegment_beg + lcp] == right_halfsegment_ptr[right_idx - right_halfsegment_beg + lcp])
- ++lcp;
-
- // Finish the computation of long LCP using naive method.
- if ((left_idx + lcp == left_halfsegment_ext_end && left_halfsegment_ext_end < text_length) ||
- (right_idx + lcp == right_halfsegment_ext_end && right_halfsegment_ext_end < text_length))
- lcp = naive_lcp(left_idx, right_idx, lcp, f_text, text_length, io_vol);
-
- // Write LCP to file.
- std::uint64_t pos_in_B = 2 * i + lcp;
- lcp_writer->write(pos_in_B);
- local_lcp_sum += lcp;
- }
-#endif
-
- pairs_processed += filled;
- }
-
- // Update I/O volume.
- io_vol += pair_reader->bytes_read() + extra_io + n_pairs * sizeof(ext_text_offset_type);
- total_io_volume += io_vol;
-
- // Clean up.
- delete pair_reader;
- utils::file_delete(pairs_filename);
-
- // Update statistics.
- sum_irreducible_lcps += local_lcp_sum;
-
- // Print summary.
- long double avg_lcp = (long double)local_lcp_sum / (long double)std::max(1UL, n_pairs);
- long double elapsed = utils::wclock() - halfsegment_process_start;
- fprintf(stderr, "time = %.1Lfs, I/O = %.1LfMiB/s, avg_lcp = %.2Lf, total I/O vol = %.2Lfn\n", elapsed,
- (1.L * io_vol / (1L << 20)) / elapsed, avg_lcp, (1.L * total_io_volume) / text_length);
- }
- }
-
- // Clean up.
- delete[] idx_buf;
-#ifdef _OPENMP
- delete[] ans_buf;
-#endif
- delete lcp_writer;
- std::fclose(f_text);
- free(left_halfsegment);
- free(right_halfsegment);
-
- // Print summary.
- long double total_time = utils::wclock() - start;
- fprintf(stderr, " Total time: %.2Lfs, total I/O vol = %.2Lfn\n",
- total_time, (1.L * total_io_volume) / text_length);
-
- return sum_irreducible_lcps;
-}
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_PROCESS_HALFSEGMENT_PAIRS_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/set_bits.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/set_bits.hpp
deleted file mode 100644
index 1168c5dd..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/set_bits.hpp
+++ /dev/null
@@ -1,219 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/set_bits.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_SET_BITS_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_SET_BITS_HPP_INCLUDED
-
-#include <cstdint>
-#include <vector>
-#include <algorithm>
-#include <omp.h>
-
-
-namespace em_succinct_irreducible_private {
-
-void set_bits(std::uint64_t *bv, std::uint64_t *tab, std::uint64_t tab_size) {
- for (std::uint64_t i = 0; i < tab_size; ++i) {
- std::uint64_t idx = tab[i];
- bv[idx >> 6] |= (1UL << (idx & 63));
- }
-}
-
-#ifdef _OPENMP
-template<typename int_type>
-void permute_into_small_buckets(int_type *tab,
- int_type *temp, std::uint64_t length,
- std::uint64_t lower_bound, std::uint64_t upper_bound,
- std::uint64_t max_bucket_size,
- std::vector<std::uint64_t> &output_bucket_sizes) {
- // Move all items into temp array.
- #pragma omp parallel for
- for (std::uint64_t j = 0; j < length; ++j)
- temp[j] = tab[j];
-
- // Compute bucket range. Note that bucket range is understood
- // as the length of ranges of keys assigned to a bucket;
- // bucket size is the number of items inside the bucket.
- static const std::uint64_t max_buckets = 1024;
- std::uint64_t value_range = upper_bound - lower_bound;
- std::uint64_t bucket_range_log = 6;
- std::uint64_t bucket_range = 64;
- while ((value_range + bucket_range - 1) / bucket_range > max_buckets) {
- ++bucket_range_log;
- bucket_range <<= 1;
- }
- std::uint64_t n_buckets = (value_range + bucket_range - 1) / bucket_range;
-
- // Allocate bucket counts.
- std::uint64_t max_threads = omp_get_max_threads();
- std::uint64_t max_range_size = (length + max_threads - 1) / max_threads;
- std::uint64_t n_threads = (length + max_range_size - 1) / max_range_size;
- std::uint64_t **bucket_ptr = new std::uint64_t*[n_threads];
- for (std::uint64_t thread_id = 0; thread_id < n_threads; ++thread_id)
- bucket_ptr[thread_id] = new std::uint64_t[n_buckets];
-
- // Permute items into buckets.
- #pragma omp parallel num_threads(n_threads)
- {
- std::uint64_t thread_id = omp_get_thread_num();
- std::uint64_t range_beg = thread_id * max_range_size;
- std::uint64_t range_end = std::min(range_beg + max_range_size, length);
- std::uint64_t *local_bucket_ptr = bucket_ptr[thread_id];
- std::fill(local_bucket_ptr, local_bucket_ptr + n_buckets, 0UL);
-
- // Compute bucket counts.
- for (std::uint64_t j = range_beg; j < range_end; ++j) {
- std::uint64_t bucket_id = (((std::uint64_t)tab[j] - lower_bound) >> bucket_range_log);
- ++local_bucket_ptr[bucket_id];
- }
-
- // Compute destination pointers.
- #pragma omp barrier
- #pragma omp single
- {
- std::uint64_t total_buckets_size = 0;
- for (std::uint64_t bucket_id = 0; bucket_id < n_buckets; ++bucket_id) {
- std::uint64_t this_bucket_size = 0;
- for (std::uint64_t i = 0; i < n_threads; ++i) {
- std::uint64_t local_bucket_size = bucket_ptr[i][bucket_id];
- bucket_ptr[i][bucket_id] = total_buckets_size + this_bucket_size;
- this_bucket_size += local_bucket_size;
- }
- total_buckets_size += this_bucket_size;
- }
- }
-
- // Move items into buckets.
- for (std::uint64_t j = range_beg; j < range_end; ++j) {
- std::uint64_t bucket_id = ((temp[j] - lower_bound) >> bucket_range_log);
- std::uint64_t dest_pos = local_bucket_ptr[bucket_id]++;
- tab[dest_pos] = temp[j];
- }
- }
-
- // Free the memory for bucket_ptr. Keep only bucket sizes.
- std::vector<std::uint64_t> unrefined_bucket_sizes(n_buckets);
- for (std::uint64_t bucket_id = 0; bucket_id < n_buckets; ++bucket_id) {
- unrefined_bucket_sizes[bucket_id] = bucket_ptr[n_threads - 1][bucket_id];
- if (bucket_id > 0)
- unrefined_bucket_sizes[bucket_id] -= bucket_ptr[n_threads - 1][bucket_id - 1];
- }
- for (std::uint64_t thread_id = 0; thread_id < n_threads; ++thread_id)
- delete[] bucket_ptr[thread_id];
- delete[] bucket_ptr;
-
- // Compute the output. If necessary, refine large buckets recursively.
- std::uint64_t cur_bucket_beg = 0;
- for (std::uint64_t bucket_id = 0; bucket_id < n_buckets; ++bucket_id) {
- if (unrefined_bucket_sizes[bucket_id] > max_bucket_size) {
- std::uint64_t lower_bound_rec = lower_bound + bucket_id * bucket_range;
- std::uint64_t upper_bound_rec = std::min(lower_bound_rec + bucket_range, upper_bound);
- permute_into_small_buckets(tab + cur_bucket_beg, temp, unrefined_bucket_sizes[bucket_id],
- lower_bound_rec, upper_bound_rec, max_bucket_size, output_bucket_sizes);
- } else output_bucket_sizes.push_back(unrefined_bucket_sizes[bucket_id]);
- cur_bucket_beg += unrefined_bucket_sizes[bucket_id];
- }
-}
-
-template<typename int_type>
-void set_bits(std::uint64_t *bv,
- std::uint64_t bv_size, int_type *tab,
- std::uint64_t tab_size, int_type *temp) {
- std::uint64_t max_threads = omp_get_max_threads();
-
- // Partition the input array into buckets.
- std::vector<std::uint64_t> bucket_sizes;
- {
- // First, partition the input array into small buckets. There may be
- // a lot of them, so they need to be merged into larger buckets.
- std::vector<std::uint64_t> small_bucket_sizes;
- std::uint64_t ideal_bucket_size = std::max(512UL, (tab_size + max_threads - 1) / max_threads);
- std::uint64_t max_bucket_size = 2UL * ideal_bucket_size;
- permute_into_small_buckets(tab, temp, tab_size, 0,
- bv_size, max_bucket_size, small_bucket_sizes);
-
- // Merge small buckets into at most max_threads final buckets.
- std::uint64_t n_small_buckets = small_bucket_sizes.size();
- std::uint64_t small_bucket_ptr = 0;
- for (std::uint64_t bucket_id = 0; bucket_id < max_threads; ++bucket_id) {
- if (small_bucket_ptr < n_small_buckets) {
- std::uint64_t cur_bucket_total_size = small_bucket_sizes[small_bucket_ptr++];
- std::uint64_t cur_bucket_range_end = small_bucket_ptr;
-
- // Keep adding buckets as long as we are
- // getting closer to the ideal bucket size.
- while (cur_bucket_range_end < n_small_buckets && (std::abs((std::int64_t)(cur_bucket_total_size +
- small_bucket_sizes[cur_bucket_range_end]) - (std::int64_t)ideal_bucket_size) <=
- std::abs((std::int64_t)cur_bucket_total_size - (std::int64_t)ideal_bucket_size) ||
- (bucket_id + 1 == max_threads)))
- cur_bucket_total_size += small_bucket_sizes[cur_bucket_range_end++];
-
- // Add the final bucket to the list.
- bucket_sizes.push_back(cur_bucket_total_size);
- small_bucket_ptr = cur_bucket_range_end;
- }
- }
- }
-
- // Update the bits in bv. The above partitioning guarantees that
- // no thread will attempt to update the same word in bv and
- // that all threads will update roughly the same amount of bits.
- // Lastly, the above guarantees bucket_sizes.size() <= max_threads.
- {
- // Partial (exclusive) sum over bucket_sizes.
- std::uint64_t total_bucket_size = 0;
- std::uint64_t n_buckets = bucket_sizes.size();
- for (std::uint64_t bucket_id = 0; bucket_id < n_buckets; ++bucket_id) {
- std::uint64_t this_bucket_size = bucket_sizes[bucket_id];
- bucket_sizes[bucket_id] = total_bucket_size;
- total_bucket_size += this_bucket_size;
- }
-
- // Set the bits in the bitvector.
- #pragma omp parallel num_threads(n_buckets)
- {
- std::uint64_t bucket_id = omp_get_thread_num();
- std::uint64_t bucket_beg = bucket_sizes[bucket_id];
- std::uint64_t bucket_end = (bucket_id + 1 == n_buckets) ? total_bucket_size : bucket_sizes[bucket_id + 1];
- for (std::uint64_t j = bucket_beg; j < bucket_end; ++j) {
- std::uint64_t bv_idx = tab[j];
- bv[bv_idx >> 6] |= (1UL << (bv_idx & 63));
- }
- }
- }
-}
-#endif
-
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_SET_BITS_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.cpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.cpp
deleted file mode 100644
index 35e57e0d..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/utils.cpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstdint>
-#include <cstring>
-#include <cerrno>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <string>
-#include <fstream>
-#include <algorithm>
-
-#include "utils.hpp"
-
-
-namespace em_succinct_irreducible_private {
-namespace utils {
-
-long double wclock() {
- timeval tim;
- gettimeofday(&tim, NULL);
- return tim.tv_sec + (tim.tv_usec / 1000000.0L);
-}
-
-std::FILE *file_open(std::string filename, std::string mode) {
- std::FILE *f = std::fopen(filename.c_str(), mode.c_str());
- if (f == NULL) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
- return f;
-}
-
-std::FILE *file_open_nobuf(std::string filename, std::string mode) {
- std::FILE *f = std::fopen(filename.c_str(), mode.c_str());
- if (f == NULL) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
- if(std::setvbuf(f, NULL, _IONBF, 0) != 0) {
- perror("setvbuf failed");
- std::exit(EXIT_FAILURE);
- }
- return f;
-}
-
-std::uint64_t file_size(std::string filename) {
- std::FILE *f = file_open_nobuf(filename, "r");
- std::fseek(f, 0, SEEK_END);
- long size = std::ftell(f);
- if (size < 0) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
- std::fclose(f);
- return (std::uint64_t)size;
-}
-
-bool file_exists(std::string filename) {
- std::FILE *f = std::fopen(filename.c_str(), "r");
- bool result = (f != NULL);
- if (f != NULL)
- std::fclose(f);
- return result;
-}
-
-void file_delete(std::string filename) {
- int res = std::remove(filename.c_str());
- if (res != 0) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
-}
-
-std::string absolute_path(std::string filename) {
- char path[1 << 12];
- bool created = false;
- if (!file_exists(filename)) {
- std::fclose(file_open(filename, "w"));
- created = true;
- }
- if (!realpath(filename.c_str(), path)) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
- if (created)
- file_delete(filename);
- return std::string(path);
-}
-
-void drop_disk_pages(std::string filename) {
- int fd = open(filename.c_str(), O_RDWR);
- if (fd == -1) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
- off_t length = lseek(fd, 0, SEEK_END);
- lseek(fd, 0L, SEEK_SET);
- posix_fadvise(fd, 0, length, POSIX_FADV_DONTNEED);
- close(fd);
-}
-
-std::int32_t random_int32(std::int32_t p, std::int32_t r) {
- return p + rand() % (r - p + 1);
-}
-
-std::int64_t random_int64(std::int64_t p, std::int64_t r) {
- std::int64_t x = random_int32(0, 1000000000);
- std::int64_t y = random_int32(0, 1000000000);
- std::int64_t z = x * 1000000000L + y;
- return p + z % (r - p + 1);
-}
-
-void fill_random_string(std::uint8_t* &s, std::uint64_t length, std::uint64_t sigma) {
- for (std::uint64_t i = 0; i < length; ++i)
- s[i] = random_int32(0, sigma - 1);
-}
-
-void fill_random_letters(std::uint8_t* &s, std::uint64_t length, std::uint64_t sigma) {
- fill_random_string(s, length, sigma);
- for (std::uint64_t i = 0; i < length; ++i)
- s[i] += 'a';
-}
-
-std::string random_string_hash() {
- uint64_t hash = (uint64_t)rand() * RAND_MAX + rand();
- std::stringstream ss;
- ss << hash;
- return ss.str();
-}
-
-std::uint64_t log2ceil(std::uint64_t x) {
- std::uint64_t pow2 = 1, w = 0;
- while (pow2 < x) { pow2 <<= 1; ++w; }
- return w;
-}
-
-std::uint64_t log2floor(std::uint64_t x) {
- std::uint64_t pow2 = 1, w = 0;
- while ((pow2 << 1) <= x) { pow2 <<= 1; ++w; }
- return w;
-}
-
-} // namespace utils
-} // namespace em_succinct_irreducible_private
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.hpp
deleted file mode 100644
index 4429713e..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/em_succinct_irreducible_src/utils.hpp
+++ /dev/null
@@ -1,123 +0,0 @@
-/**
- * @file em_succinct_irreducible_src/utils.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __EM_SUCCINCT_IRREDUCIBLE_SRC_UTILS_HPP_INCLUDED
-#define __EM_SUCCINCT_IRREDUCIBLE_SRC_UTILS_HPP_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstdint>
-#include <string>
-#include <sstream>
-
-
-namespace em_succinct_irreducible_private {
-namespace utils {
-
-long double wclock();
-
-std::FILE *file_open(std::string fname, std::string mode);
-std::FILE *file_open_nobuf(std::string fname, std::string mode);
-std::uint64_t file_size(std::string fname);
-bool file_exists(std::string fname);
-void file_delete(std::string fname);
-std::string absolute_path(std::string fname);
-void drop_disk_pages(std::string filename);
-
-template<typename value_type>
-void write_to_file(const value_type *src, std::uint64_t length, std::FILE *f) {
- std::uint64_t fwrite_ret = std::fwrite(src, sizeof(value_type), length, f);
- if (fwrite_ret != length) {
- fprintf(stderr, "\nError: fwrite failed.\n");
- std::exit(EXIT_FAILURE);
- }
-}
-
-template<typename value_type>
-void write_to_file(const value_type *src, std::uint64_t length, std::string fname) {
- std::FILE *f = file_open_nobuf(fname, "w");
- write_to_file(src, length, f);
- std::fclose(f);
-}
-
-template<typename value_type>
-void read_from_file(value_type* dest, std::uint64_t length, std::FILE *f) {
- std::uint64_t fread_ret = std::fread(dest, sizeof(value_type), length, f);
- if (fread_ret != length) {
- fprintf(stderr, "\nError: fread failed.\n");
- std::exit(EXIT_FAILURE);
- }
-}
-
-template<typename value_type>
-void read_from_file(value_type* dest, std::uint64_t length, std::string fname) {
- std::FILE *f = file_open_nobuf(fname, "r");
- read_from_file<value_type>(dest, length, f);
- std::fclose(f);
-}
-
-template<typename value_type>
-void read_at_offset(value_type *dest, std::uint64_t offset,
- std::uint64_t length, std::FILE *f) {
- std::fseek(f, sizeof(value_type) * offset, SEEK_SET);
- read_from_file(dest, length, f);
-}
-
-template<typename value_type>
-void read_at_offset(value_type *dest, std::uint64_t offset,
- std::uint64_t length, std::string filename) {
- std::FILE *f = file_open_nobuf(filename, "r");
- read_at_offset(dest, offset, length, f);
- std::fclose(f);
-}
-
-std::int32_t random_int32(std::int32_t p, std::int32_t r);
-std::int64_t random_int64(std::int64_t p, std::int64_t r);
-void fill_random_string(std::uint8_t* &s, std::uint64_t length, std::uint64_t sigma);
-void fill_random_letters(std::uint8_t* &s, std::uint64_t length, std::uint64_t sigma);
-std::string random_string_hash();
-
-std::uint64_t log2ceil(std::uint64_t x);
-std::uint64_t log2floor(std::uint64_t x);
-
-template<typename int_type>
-std::string intToStr(int_type x) {
- std::stringstream ss;
- ss << x;
- return ss.str();
-}
-
-} // namespace utils
-} // namespace em_succinct_irreducible_private
-
-#endif // __EM_SUCCINCT_IRREDUCIBLE_SRC_UTILS_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_array.cpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_array.cpp
deleted file mode 100644
index 7d4557f8..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_array.cpp
+++ /dev/null
@@ -1,236 +0,0 @@
-/**
- * @file main.cpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstdint>
-#include <ctime>
-#include <string>
-#include <getopt.h>
-#include <unistd.h>
-
-#include "uint40.hpp"
-#include "uint48.hpp"
-#include "em_succinct_irreducible_src/compute_lcp_array.hpp"
-
-char *program_name;
-
-void usage(int status) {
- printf(
-
-"Usage: %s [OPTION]... FILE\n"
-"Construct the LCP array for text stored in FILE.\n"
-"\n"
-"Mandatory arguments to long options are mandatory for short options too.\n"
-" -b, --bwt=BWTFILE specify the location of the Burrows-Wheeler\n"
-" transform of FILE (default: FILE.bwt)\n"
-" -h, --help display this help and exit\n"
-" -i, --intsize=SIZE use integers of SIZE bytes (default: 5). Currently\n"
-" supported values are 4, 5, 6, and 8\n"
-" -m, --mem=MEM use MEM MiB of RAM for computation (default: 3584)\n"
-" -o, --output=OUTFILE specify output filename (default: FILE.lcpX, where\n"
-" X = integer size, see the -i flag)\n"
-" -s, --sa=SUFARRAY specify the location of the suffix array of FILE\n"
-" (default: FILE.saX, X = integer size, see -i flag)\n",
- program_name);
-
- std::exit(status);
-}
-
-bool file_exists(std::string filename) {
- std::FILE *f = std::fopen(filename.c_str(), "r");
- bool ret = (f != NULL);
- if (f != NULL) std::fclose(f);
-
- return ret;
-}
-
-std::FILE *file_open(std::string filename, std::string mode) {
- std::FILE *f = std::fopen(filename.c_str(), mode.c_str());
- if (f == NULL) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
- return f;
-}
-
-std::uint64_t file_size(std::string filename) {
- std::FILE *f = file_open(filename, "r");
- std::fseek(f, 0, SEEK_END);
- long size = std::ftell(f);
- if (size < 0) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
- std::fclose(f);
- return (std::uint64_t)size;
-}
-
-template<typename int_type>
-std::string intToStr(int_type x) {
- std::stringstream ss;
- ss << x;
- return ss.str();
-}
-
-template<typename text_offset_type>
-void compute_lcp_array(std::string text_filename, std::string sa_filename,
- std::string bwt_filename, std::string output_filename, std::uint64_t ram_use) {
- std::uint64_t text_length = file_size(text_filename);
- if (2UL * text_length <= std::numeric_limits<text_offset_type>::max()) {
- em_succinct_irreducible_private::compute_lcp_array<text_offset_type, text_offset_type>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- } else {
- if (sizeof(text_offset_type) < 4) em_succinct_irreducible_private::compute_lcp_array<text_offset_type, std::uint32_t>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- else if (sizeof(text_offset_type) == 4) em_succinct_irreducible_private::compute_lcp_array<text_offset_type, uint40>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- else if (sizeof(text_offset_type) == 5) em_succinct_irreducible_private::compute_lcp_array<text_offset_type, uint48>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- else em_succinct_irreducible_private::compute_lcp_array<text_offset_type, std::uint64_t>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- }
-}
-
-int main(int argc, char **argv) {
- srand(time(0) + getpid());
- program_name = argv[0];
-
- static struct option long_options[] = {
- {"bwt", required_argument, NULL, 'b'},
- {"help", no_argument, NULL, 'h'},
- {"intsize", required_argument, NULL, 'i'},
- {"mem", required_argument, NULL, 'm'},
- {"output", required_argument, NULL, 'o'},
- {"sa", required_argument, NULL, 's'},
- {NULL, 0, NULL, 0}
- };
-
- std::uint64_t int_size = 5;
- std::uint64_t ram_use = 3584UL << 20;
- std::string out_filename("");
- std::string sa_filename("");
- std::string bwt_filename("");
-
- // Parse command-line options.
- int c;
- while ((c = getopt_long(argc, argv, "b:hi:m:o:s:", long_options, NULL)) != -1) {
- switch(c) {
- case 'b':
- bwt_filename = std::string(optarg);
- break;
- case 'h':
- usage(EXIT_FAILURE);
- case 'i':
- int_size = std::atol(optarg);
- if (!(int_size == 4 || int_size == 5 || int_size == 6 || int_size == 8)) {
- fprintf(stderr, "Error: invalid int size (%lu)\n\n", int_size);
- usage(EXIT_FAILURE);
- }
- break;
- case 'm':
- ram_use = std::atol(optarg) << 20;
- if (ram_use == 0) {
- fprintf(stderr, "Error: invalid RAM limit (%lu)\n\n", ram_use);
- usage(EXIT_FAILURE);
- }
- break;
- case 'o':
- out_filename = std::string(optarg);
- break;
- case 's':
- sa_filename = std::string(optarg);
- break;
- default:
- usage(EXIT_FAILURE);
- }
- }
-
- if (optind >= argc) {
- fprintf(stderr, "Error: FILE not provided\n\n");
- usage(EXIT_FAILURE);
- }
-
- // Parse the text filename.
- std::string text_filename = std::string(argv[optind++]);
- if (optind < argc) {
- fprintf(stderr, "Warning: multiple input files provided. "
- "Only the first will be processed.\n");
- }
-
- // Set default filenames (if not provided).
- if (sa_filename.empty()) sa_filename = text_filename + ".sa" + intToStr(int_size);
- if (out_filename.empty()) out_filename = text_filename + ".lcp" + intToStr(int_size);
- if (bwt_filename.empty()) bwt_filename = text_filename + ".bwt";
-
- // Check if input text, suffix array, and BWT exist.
- if (!file_exists(text_filename)) {
- fprintf(stderr, "Error: input file (%s) does not exist\n\n",
- text_filename.c_str());
- usage(EXIT_FAILURE);
- }
- if (!file_exists(sa_filename)) {
- fprintf(stderr, "Error: suffix array (%s) does not exist\n\n",
- sa_filename.c_str());
- usage(EXIT_FAILURE);
- }
- if (!file_exists(bwt_filename)) {
- fprintf(stderr, "Error: BWT of input text (%s) does not exist\n\n",
- bwt_filename.c_str());
- usage(EXIT_FAILURE);
- }
-
- if (file_exists(out_filename)) {
- // Output file exists, should we proceed?
- char *line = NULL;
- std::uint64_t buflen = 0;
- std::int64_t len = 0L;
-
- do {
- printf("Output file (%s) exists. Overwrite? [y/n]: ",
- out_filename.c_str());
- if ((len = getline(&line, &buflen, stdin)) == -1) {
- printf("\nError: failed to read answer\n\n");
- std::fflush(stdout);
- usage(EXIT_FAILURE);
- }
- } while (len != 2 || (line[0] != 'y' && line[0] != 'n'));
-
- if (line[0] == 'n') {
- free(line);
- std::exit(EXIT_FAILURE);
- }
- free(line);
- }
-
- // Run the algorithm.
- if (int_size == 4) compute_lcp_array<std::uint32_t>(text_filename, sa_filename, bwt_filename, out_filename, ram_use);
- else if (int_size == 5) compute_lcp_array<uint40>(text_filename, sa_filename, bwt_filename, out_filename, ram_use);
- else if (int_size == 6) compute_lcp_array<uint48>(text_filename, sa_filename, bwt_filename, out_filename, ram_use);
- else compute_lcp_array<std::uint64_t>(text_filename, sa_filename, bwt_filename, out_filename, ram_use);
-}
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_from_plcp.cpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_from_plcp.cpp
deleted file mode 100644
index c3cab4b1..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_lcp_from_plcp.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-/**
- * @file main.cpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstdint>
-#include <ctime>
-#include <string>
-#include <getopt.h>
-#include <unistd.h>
-
-#include "uint40.hpp"
-#include "uint48.hpp"
-#include "em_succinct_irreducible_src/compute_lcp_from_plcp.hpp"
-
-char *program_name;
-
-void usage(int status) {
- printf(
-
-"Usage: %s [OPTION]... FILE\n"
-"Convert PLCP array (bitvector representation) stored in FILE to LCP array.\n"
-"\n"
-"Mandatory arguments to long options are mandatory for short options too.\n"
-" -h, --help display this help and exit\n"
-" -i, --intsize=SIZE use integers of SIZE bytes (default: 5). Currently\n"
-" supported values are 4, 5, 6, and 8\n"
-" -m, --mem=MEM use MEM MiB of RAM for computation (default: 3584)\n"
-" -o, --output=OUTFILE specify output filename (default: FILE.lcpX, where\n"
-" X = integer size, see the -i flag)\n"
-" -s, --sa=SUFARRAY specify the location of the suffix array of FILE\n"
-" (default: FILE.saX, X = integer size, see -i flag)\n",
- program_name);
-
- std::exit(status);
-}
-
-bool file_exists(std::string filename) {
- std::FILE *f = std::fopen(filename.c_str(), "r");
- bool ret = (f != NULL);
- if (f != NULL) std::fclose(f);
-
- return ret;
-}
-
-template<typename int_type>
-std::string intToStr(int_type x) {
- std::stringstream ss;
- ss << x;
- return ss.str();
-}
-
-int main(int argc, char **argv) {
- srand(time(0) + getpid());
- program_name = argv[0];
-
- static struct option long_options[] = {
- {"help", no_argument, NULL, 'h'},
- {"intsize", required_argument, NULL, 'i'},
- {"mem", required_argument, NULL, 'm'},
- {"output", required_argument, NULL, 'o'},
- {"sa", required_argument, NULL, 's'},
- {NULL, 0, NULL, 0}
- };
-
- std::uint64_t int_size = 5;
- std::uint64_t ram_use = 3584UL << 20;
- std::string out_filename("");
- std::string sa_filename("");
-
- // Parse command-line options.
- int c;
- while ((c = getopt_long(argc, argv, "hi:m:o:s:", long_options, NULL)) != -1) {
- switch(c) {
- case 'h':
- usage(EXIT_FAILURE);
- case 'i':
- int_size = std::atol(optarg);
- if (!(int_size == 4 || int_size == 5 || int_size == 6 || int_size == 8)) {
- fprintf(stderr, "Error: invalid int size (%lu)\n\n", int_size);
- usage(EXIT_FAILURE);
- }
- break;
- case 'm':
- ram_use = std::atol(optarg) << 20;
- if (ram_use == 0) {
- fprintf(stderr, "Error: invalid RAM limit (%lu)\n\n", ram_use);
- usage(EXIT_FAILURE);
- }
- break;
- case 'o':
- out_filename = std::string(optarg);
- break;
- case 's':
- sa_filename = std::string(optarg);
- break;
- default:
- usage(EXIT_FAILURE);
- }
- }
-
- if (optind >= argc) {
- fprintf(stderr, "Error: FILE not provided\n\n");
- usage(EXIT_FAILURE);
- }
-
- // Parse the text filename.
- std::string input_filename = std::string(argv[optind++]);
- if (optind < argc) {
- fprintf(stderr, "Warning: multiple input files provided. "
- "Only the first will be processed.\n");
- }
-
- // Set default filenames (if not provided).
- if (sa_filename.empty()) sa_filename = input_filename + ".sa" + intToStr(int_size);
- if (out_filename.empty()) out_filename = input_filename + ".lcp" + intToStr(int_size);
-
- // Check if input text, suffix array, and BWT exist.
- if (!file_exists(input_filename)) {
- fprintf(stderr, "Error: input file (%s) does not exist\n\n",
- input_filename.c_str());
- usage(EXIT_FAILURE);
- }
- if (!file_exists(sa_filename)) {
- fprintf(stderr, "Error: suffix array (%s) does not exist\n\n",
- sa_filename.c_str());
- usage(EXIT_FAILURE);
- }
-
- if (file_exists(out_filename)) {
- // Output file exists, should we proceed?
- char *line = NULL;
- std::uint64_t buflen = 0;
- std::int64_t len = 0L;
-
- do {
- printf("Output file (%s) exists. Overwrite? [y/n]: ",
- out_filename.c_str());
- if ((len = getline(&line, &buflen, stdin)) == -1) {
- printf("\nError: failed to read answer\n\n");
- std::fflush(stdout);
- usage(EXIT_FAILURE);
- }
- } while (len != 2 || (line[0] != 'y' && line[0] != 'n'));
-
- if (line[0] == 'n') {
- free(line);
- std::exit(EXIT_FAILURE);
- }
- free(line);
- }
-
- // Run the algorithm.
- if (int_size == 4) em_succinct_irreducible_private::compute_lcp_from_plcp<std::uint32_t>(input_filename, sa_filename, out_filename, ram_use);
- else if (int_size == 5) em_succinct_irreducible_private::compute_lcp_from_plcp<uint40>(input_filename, sa_filename, out_filename, ram_use);
- else if (int_size == 6) em_succinct_irreducible_private::compute_lcp_from_plcp<uint48>(input_filename, sa_filename, out_filename, ram_use);
- else em_succinct_irreducible_private::compute_lcp_from_plcp<std::uint64_t>(input_filename, sa_filename, out_filename, ram_use);
-}
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_plcp_bitvector.cpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_plcp_bitvector.cpp
deleted file mode 100644
index cafbb51d..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/main_construct_plcp_bitvector.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-/**
- * @file main.cpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstdint>
-#include <ctime>
-#include <string>
-#include <getopt.h>
-#include <unistd.h>
-
-#include "uint40.hpp"
-#include "uint48.hpp"
-#include "em_succinct_irreducible_src/compute_plcp_bitvector.hpp"
-
-char *program_name;
-
-void usage(int status) {
- printf(
-
-"Usage: %s [OPTION]... FILE\n"
-"Construct the PLCP array (bitvector representation) for text stored in FILE.\n"
-"\n"
-"Mandatory arguments to long options are mandatory for short options too.\n"
-" -b, --bwt=BWTFILE specify the location of the Burrows-Wheeler\n"
-" transform of FILE (default: FILE.bwt)\n"
-" -h, --help display this help and exit\n"
-" -i, --intsize=SIZE use integers of SIZE bytes (default: 5). Currently\n"
-" supported values are 4, 5, 6, and 8\n"
-" -m, --mem=MEM use MEM MiB of RAM for computation (default: 3584)\n"
-" -o, --output=OUTFILE specify output filename (default: FILE.plcp)\n"
-" -s, --sa=SUFARRAY specify the location of the suffix array of FILE\n"
-" (default: FILE.saX, X = integer size, see -i flag)\n",
- program_name);
-
- std::exit(status);
-}
-
-bool file_exists(std::string filename) {
- std::FILE *f = std::fopen(filename.c_str(), "r");
- bool ret = (f != NULL);
- if (f != NULL) std::fclose(f);
-
- return ret;
-}
-
-std::FILE *file_open(std::string filename, std::string mode) {
- std::FILE *f = std::fopen(filename.c_str(), mode.c_str());
- if (f == NULL) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
- return f;
-}
-
-std::uint64_t file_size(std::string filename) {
- std::FILE *f = file_open(filename, "r");
- std::fseek(f, 0, SEEK_END);
- long size = std::ftell(f);
- if (size < 0) {
- std::perror(filename.c_str());
- std::exit(EXIT_FAILURE);
- }
- std::fclose(f);
- return (std::uint64_t)size;
-}
-
-template<typename int_type>
-std::string intToStr(int_type x) {
- std::stringstream ss;
- ss << x;
- return ss.str();
-}
-
-template<typename text_offset_type>
-void compute_plcp_bitvector(std::string text_filename, std::string sa_filename,
- std::string bwt_filename, std::string output_filename, std::uint64_t ram_use) {
- std::uint64_t text_length = file_size(text_filename);
- if (2UL * text_length <= std::numeric_limits<text_offset_type>::max()) {
- em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, text_offset_type>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- } else {
- if (sizeof(text_offset_type) < 4) em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, std::uint32_t>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- else if (sizeof(text_offset_type) == 4) em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, uint40>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- else if (sizeof(text_offset_type) == 5) em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, uint48>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- else em_succinct_irreducible_private::compute_plcp_bitvector<text_offset_type, std::uint64_t>(text_filename, sa_filename, bwt_filename, output_filename, ram_use);
- }
-}
-
-int main(int argc, char **argv) {
- srand(time(0) + getpid());
- program_name = argv[0];
-
- static struct option long_options[] = {
- {"bwt", required_argument, NULL, 'b'},
- {"help", no_argument, NULL, 'h'},
- {"intsize", required_argument, NULL, 'i'},
- {"mem", required_argument, NULL, 'm'},
- {"output", required_argument, NULL, 'o'},
- {"sa", required_argument, NULL, 's'},
- {NULL, 0, NULL, 0}
- };
-
- std::uint64_t int_size = 5;
- std::uint64_t ram_use = 3584UL << 20;
- std::string out_filename("");
- std::string sa_filename("");
- std::string bwt_filename("");
-
- // Parse command-line options.
- int c;
- while ((c = getopt_long(argc, argv, "b:hi:m:o:s:", long_options, NULL)) != -1) {
- switch(c) {
- case 'b':
- bwt_filename = std::string(optarg);
- break;
- case 'h':
- usage(EXIT_FAILURE);
- case 'i':
- int_size = std::atol(optarg);
- if (!(int_size == 4 || int_size == 5 || int_size == 6 || int_size == 8)) {
- fprintf(stderr, "Error: invalid int size (%lu)\n\n", int_size);
- usage(EXIT_FAILURE);
- }
- break;
- case 'm':
- ram_use = std::atol(optarg) << 20;
- if (ram_use == 0) {
- fprintf(stderr, "Error: invalid RAM limit (%lu)\n\n", ram_use);
- usage(EXIT_FAILURE);
- }
- break;
- case 'o':
- out_filename = std::string(optarg);
- break;
- case 's':
- sa_filename = std::string(optarg);
- break;
- default:
- usage(EXIT_FAILURE);
- }
- }
-
- if (optind >= argc) {
- fprintf(stderr, "Error: FILE not provided\n\n");
- usage(EXIT_FAILURE);
- }
-
- // Parse the text filename.
- std::string text_filename = std::string(argv[optind++]);
- if (optind < argc) {
- fprintf(stderr, "Warning: multiple input files provided. "
- "Only the first will be processed.\n");
- }
-
- // Set default filenames (if not provided).
- if (sa_filename.empty()) sa_filename = text_filename + ".sa" + intToStr(int_size);
- if (out_filename.empty()) out_filename = text_filename + ".plcp";
- if (bwt_filename.empty()) bwt_filename = text_filename + ".bwt";
-
- // Check if input text, suffix array, and BWT exist.
- if (!file_exists(text_filename)) {
- fprintf(stderr, "Error: input file (%s) does not exist\n\n",
- text_filename.c_str());
- usage(EXIT_FAILURE);
- }
- if (!file_exists(sa_filename)) {
- fprintf(stderr, "Error: suffix array (%s) does not exist\n\n",
- sa_filename.c_str());
- usage(EXIT_FAILURE);
- }
- if (!file_exists(bwt_filename)) {
- fprintf(stderr, "Error: BWT of input text (%s) does not exist\n\n",
- bwt_filename.c_str());
- usage(EXIT_FAILURE);
- }
-
- if (file_exists(out_filename)) {
- // Output file exists, should we proceed?
- char *line = NULL;
- std::uint64_t buflen = 0;
- std::int64_t len = 0L;
-
- do {
- printf("Output file (%s) exists. Overwrite? [y/n]: ",
- out_filename.c_str());
- if ((len = getline(&line, &buflen, stdin)) == -1) {
- printf("\nError: failed to read answer\n\n");
- std::fflush(stdout);
- usage(EXIT_FAILURE);
- }
- } while (len != 2 || (line[0] != 'y' && line[0] != 'n'));
-
- if (line[0] == 'n') {
- free(line);
- std::exit(EXIT_FAILURE);
- }
- free(line);
- }
-
- // Run the algorithm.
- if (int_size == 4) compute_plcp_bitvector<std::uint32_t>(text_filename, sa_filename, bwt_filename, out_filename, ram_use);
- else if (int_size == 5) compute_plcp_bitvector<uint40>(text_filename, sa_filename, bwt_filename, out_filename, ram_use);
- else if (int_size == 6) compute_plcp_bitvector<uint48>(text_filename, sa_filename, bwt_filename, out_filename, ram_use);
- else compute_plcp_bitvector<std::uint64_t>(text_filename, sa_filename, bwt_filename, out_filename, ram_use);
-}
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/uint40.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/uint40.hpp
deleted file mode 100644
index a2cbe22c..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/uint40.hpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * @file uint40.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __UINT40_HPP_INCLUDED
-#define __UINT40_HPP_INCLUDED
-
-#include <cstdint>
-#include <limits>
-
-
-class uint40 {
- private:
- std::uint32_t low;
- std::uint8_t high;
-
- public:
- uint40() {}
- uint40(std::uint32_t l, std::uint8_t h) : low(l), high(h) {}
- uint40(const uint40& a) : low(a.low), high(a.high) {}
- uint40(const std::int32_t& a) : low(a), high(0) {}
- uint40(const std::uint32_t& a) : low(a), high(0) {}
- uint40(const std::uint64_t& a) : low(a & 0xFFFFFFFF), high((a >> 32) & 0xFF) {}
- uint40(const std::int64_t& a) : low(a & 0xFFFFFFFFL), high((a >> 32) & 0xFF) {}
-
- inline operator uint64_t() const { return (((std::uint64_t)high) << 32) | (std::uint64_t)low; }
- inline bool operator == (const uint40& b) const { return (low == b.low) && (high == b.high); }
- inline bool operator != (const uint40& b) const { return (low != b.low) || (high != b.high); }
-} __attribute__((packed));
-
-namespace std {
-
-template<>
-class numeric_limits<uint40> {
- public:
- static uint40 min() {
- return uint40(std::numeric_limits<std::uint32_t>::min(),
- std::numeric_limits<std::uint8_t>::min());
- }
-
- static uint40 max() {
- return uint40(std::numeric_limits<std::uint32_t>::max(),
- std::numeric_limits<std::uint8_t>::max());
- }
-};
-
-} // namespace std
-
-#endif // __UINT40_HPP_INCLUDED
diff --git a/exttools/EM-SuccinctIrreducible-0.1.0/src/uint48.hpp b/exttools/EM-SuccinctIrreducible-0.1.0/src/uint48.hpp
deleted file mode 100644
index a6f6ab31..00000000
--- a/exttools/EM-SuccinctIrreducible-0.1.0/src/uint48.hpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * @file uint48.hpp
- * @section LICENCE
- *
- * This file is part of EM-SuccinctIrreducible v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2016
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __UINT48_HPP_INCLUDED
-#define __UINT48_HPP_INCLUDED
-
-#include <cstdint>
-#include <limits>
-
-
-class uint48 {
- private:
- std::uint32_t low;
- std::uint16_t high;
-
- public:
- uint48() {}
- uint48(std::uint32_t l, std::uint16_t h) : low(l), high(h) {}
- uint48(const uint48& a) : low(a.low), high(a.high) {}
- uint48(const std::int32_t& a) : low(a), high(0) {}
- uint48(const std::uint32_t& a) : low(a), high(0) {}
- uint48(const std::uint64_t& a) : low(a & 0xFFFFFFFF), high((a >> 32) & 0xFFFF) {}
- uint48(const std::int64_t& a) : low(a & 0xFFFFFFFFL), high((a >> 32) & 0xFFFF) {}
-
- inline operator uint64_t() const { return (((std::uint64_t)high) << 32) | (std::uint64_t)low; }
- inline bool operator == (const uint48& b) const { return (low == b.low) && (high == b.high); }
- inline bool operator != (const uint48& b) const { return (low != b.low) || (high != b.high); }
-} __attribute__((packed));
-
-namespace std {
-
-template<>
-class numeric_limits<uint48> {
- public:
- static uint48 min() {
- return uint48(std::numeric_limits<std::uint32_t>::min(),
- std::numeric_limits<std::uint16_t>::min());
- }
-
- static uint48 max() {
- return uint48(std::numeric_limits<std::uint32_t>::max(),
- std::numeric_limits<std::uint16_t>::max());
- }
-};
-
-} // namespace std
-
-#endif // __UINT48_HPP_INCLUDED
diff --git a/exttools/do.sh b/exttools/do.sh
deleted file mode 100755
index 02e1fe90..00000000
--- a/exttools/do.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-
-# $1 = text input
-# add the zero byte to the text input, save as $1.0
-stxxltools/build/standardize "${1}"
-# compute SA, store as $1.0.sa5
-/bighome/workspace/pSAscan-0.1.0/src/psascan "${1}.0"
-# compute ISA and BWT
-/bighome/workspace/stxxltools/build/isaandbwt "${1}.0"
-# compute PLCP
-/bighome/workspace/EM-SuccinctIrreducible-0.1.0/src/construct_plcp_sequential "${1}.0"
-
diff --git a/exttools/pSAscan-0.1.0/AUTHORS b/exttools/pSAscan-0.1.0/AUTHORS
deleted file mode 100644
index af53cfd7..00000000
--- a/exttools/pSAscan-0.1.0/AUTHORS
+++ /dev/null
@@ -1,2 +0,0 @@
-Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
-Dominik Kempa (contact person) <dominik.kempa (at) gmail.com>
diff --git a/exttools/pSAscan-0.1.0/LICENCE b/exttools/pSAscan-0.1.0/LICENCE
deleted file mode 100644
index 10333c04..00000000
--- a/exttools/pSAscan-0.1.0/LICENCE
+++ /dev/null
@@ -1,24 +0,0 @@
-Copyright (C) 2014-2015
-Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
-Dominik Kempa <dominik.kempa (at) gmail.com>
-
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation
-files (the "Software"), to deal in the Software without
-restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the
-Software is furnished to do so, subject to the following
-conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
diff --git a/exttools/pSAscan-0.1.0/README b/exttools/pSAscan-0.1.0/README
deleted file mode 100644
index b1c6509f..00000000
--- a/exttools/pSAscan-0.1.0/README
+++ /dev/null
@@ -1,206 +0,0 @@
-pSAscan - parallel external memory suffix array construction algorithm.
-=======================================================================
-
-
-Description
------------
-
-This package contains implementation of the parallel external-memory
-suffix array construction algorithm called pSAscan described in the paper
-
- Juha Karkkainen, Dominik Kempa, and Simon J. Puglisi,
- Parallel External Memory Suffix Sorting.
- In Proc. 26th Annual Symposium on Combinatorial Pattern Matching (CPM) 2015.
-
-The algorithm is based on the sequential external-memory suffix array
-construction algorithm called SAscan described in
-
- Juha Karkkainen and Dominik Kempa,
- Engineering a Lightweight External Memory Suffix Array Construction Algorithm.
- In Proc. 2nd International Conference on Algorithms for Big Data (ICABD) 2014.
-
-The latest version of SAscan/pSAscan is available at:
- http://www.cs.helsinki.fi/group/pads/
-
-
-
-Compilation and usage
----------------------
-
-1. Download http://libdivsufsort.googlecode.com/files/libdivsufsort-2.0.1.tar.gz
- and install. Make sure to compile libdivsufsort to static 64-bit libraries,
- i.e. set options in the main CMakeLists.txt to
-
- option(BUILD_SHARED_LIBS "Set to OFF to build static libraries" OFF)
- option(BUILD_DIVSUFSORT64 "Build libdivsufsort64" ON)
-
-2. Compile pSAscan using the provided Makefile
-
- $ cd src
- $ make
-
-This will produce the executable 'psascan' that allows computing the suffix
-array of a given file. For usage, run the 'psascan' program without any
-arguments.
-
-Example
-~~~~~~~
-
-To compute the suffix array of a file in.txt located in /data01/ using 8GiB
-of RAM run the 'psascan' command (assuming you are in the src/ directory) as:
-
- $ ./psascan /data01/in.txt -m 8192
-
-By default, the resulting suffix array is written to a file matching the
-filename of the input text with the .sa5 extension (/data01/in.txt.sa5
-in this case). To write the suffix array to a different file, use the
--o flag, e.g.,
-
- $ ./psascan /data01/in.txt -m 8192 -o /data02/in.txt.suf
-
-The current implementation encodes the output suffix array using unsigned
-40-bit integers. For further processing of the suffix array, one should use
-the same or compatible encoding. The class implementing the unsigned 40-bit
-integers is located in the src/psascan_src/uint40.h file.
-
-
-
-Disk space requirements
------------------------
-
-To compute the suffix array of an n-byte input text, pSAscan needs about
-7.5n bytes of disk space. This includes the input (n bytes) and output
-(5n bytes).
-
-In the default mode, the 'psascan' program assumes, that there is 6.5n bytes
-of free disk space available in the location used as the destination for the
-suffix array. This space is used for auxiliary files created during the
-computation and to accommodate the output.
-
-The above disk space requirement may in some cases prohibit the use of
-algorithm, e.g., if there is enough space (5n) on one physical disk to hold
-the suffix array, but not enough (6.5n) to run the algorithm. To still
-allow the computation in such cases, the 'psascan' program implements the
--g flag. With this flag, one can force pSAscan to use disk space from two
-physically different locations (e.g., on two disks).
-
-More precisely, out of 6.5n bytes of disk space used by pSAscan, about n
-bytes is used to store the so-called "gap array". By default, the gap array
-is stored along with the suffix array. The -g flag allows explicitly
-specifying the location of the gap array. This way, it suffices that there
-is only 5.5n bytes of disk space in the location specified as the destination
-of the suffix array. The remaining n bytes can be allocated in other location
-specified with the -g flag.
-
-Example
-~~~~~~~
-
-Assume the location of input/output files and RAM usage as in the example
-from the previous section. To additionally specify the location of the gap
-array as /data03/in.txt.gap run the 'psascan' command as:
-
- $ ./psascan /data01/in.txt -m 8192 -o /data02/in.txt.suf -g /data03/in.txt.gap
-
-
-
-RAM requirements
-----------------
-
-The algorithm does not have a fixed memory requirements. In principle, it
-can run with any amount of RAM (though there is some minimal per-thread
-amount necessary in the streaming phase). However, since the time complexity
-(without logarithmic factors) of the algorithm is O(n^2 / M), where M is the
-amount of RAM used in the computation, using more RAM decreases the runtime.
-Thus, the best performance is achieved when nearly all unused RAM available
-in the system (as shown by the Linux 'free' command) is used for the
-computation. Leaving about 5% (but not more than 2GiB) of RAM free is
-advised to prevent thrashing.
-
-Example
-~~~~~~~
-
-On a machine with 12 physical cores and Hyper-Threading (and thus capable
-of simultaneously running 24 threads) it takes about a week to compute a
-suffix array of a 200GiB file using 3.5GiB of RAM. Using 120GiB of RAM
-reduces the time to less than 12 hours.
-
-
-
-Troubleshooting
----------------
-
-1. I am getting "Error: the limit on the maximum number of open files
- is too small (...)".
-
-Solution: The error is caused by the operating system imposing a limit
-on the maximum number of files opened by a program. The limit (in Linux
-referred to as the soft limit) can be increased with the "ulimit -n newlimit"
-command. However, in Linux the soft limit cannot be increased beyond the
-so-called "hard limit", which is usually only few times larger than the
-soft limit. Furthermore, this is a temporary solution that needs to repeated
-every time a new session is started. To increase the limits permanently,
-edit (as a root) the file /etc/security/limits.conf and add the following
-lines at the end (including the asterisks):
-
-* soft nofile 128000
-* hard nofile 128000
-
-This increases the limit to 128000 (use larger values if necessary). The
-new limits apply (check with ulimit -n) after starting a new session.
-
-2. Program stops without any error message.
-
-Solution: Most likely the problem occurred during internal-memory sorting.
-Re-running the program with -v flag should show the error message.
-
-
-
-Limitations / known issues
---------------------------
-
-1. The maximum size of input text is 1TiB (2^40 bytes).
-2. The current implementation supports only inputs over byte alphabet.
-3. Only texts not containing bytes with value 255 are handled correctly.
- The 255-bytes can be removed from the input text using the tool located
- in the directory tools/delete-bytes-255/ of this package.
-4. The current internal-memory suffix sorting algorithm used internally
- in pSAscan works only if the input text is split into segments of
- size at most 2GiB each. Therefore, pSAscan will fail, if the memory
- budget X for the computation (specified with the -m flag) satisfies
- X / p > 10 * 2^31, where p is the number of threads used during
- the computation. On most systems, this is not a severe limitation,
- e.g., for a regular 4-core machine supporting Hyper-Threading (and
- thus capable of simultaneously running 8 threads), pSAscan can utilize
- up to 160GiB of RAM.
-
-The above limitations (except possibly 2) are not inherent to the algorithm
-but rather the current implementation. Future releases will most likely
-overcome these limitations.
-
-
-
-Third-party code
-----------------
-
-The pSAscan implementation makes use of some third-party code, in particular:
- - the uint40 class was copied (and slightly modified) from the eSAIS-0.5.2
- algorithm (https://panthema.net/2012/1119-eSAIS-Inducing-Suffix-and-
- LCP-Arrays-in-External-Memory/)
- - pSAscan uses the libdivsufsort-2.0.1 algorithm as the internal
- suffix-sorting routine (https://code.google.com/p/libdivsufsort/)
-
-
-
-Terms of use
-------------
-
-pSAscan is released under the MIT/X11 license. See the file LICENCE for
-more details.
-
-If you use this code, please cite the paper mentioned above and publish
-the URL from which you downloaded the code.
-
-
-
-Helsinki, June 2015.
-Written by Dominik Kempa <dominik.kempa (at) gmail.com>
diff --git a/exttools/pSAscan-0.1.0/VERSION b/exttools/pSAscan-0.1.0/VERSION
deleted file mode 100644
index 6e8bf73a..00000000
--- a/exttools/pSAscan-0.1.0/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.1.0
diff --git a/exttools/pSAscan-0.1.0/src/Makefile b/exttools/pSAscan-0.1.0/src/Makefile
deleted file mode 100644
index b53e1d35..00000000
--- a/exttools/pSAscan-0.1.0/src/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-LIB_DIR = /home/niki/opt/lib
-INC_DIR = /home/niki/opt/include
-SHELL = /bin/sh
-CC = g++
-CFLAGS = -Wall -Wextra -pedantic -Wshadow -funroll-loops -pthread -std=c++0x -DNDEBUG -O3 -march=native -I$(INC_DIR) -L$(LIB_DIR)
-
-all: psascan
-psascan: main.cpp
- $(CC) $(CFLAGS) -o psascan ./psascan_src/utils.cpp main.cpp -ldivsufsort -ldivsufsort64 -fopenmp
-clean:
- /bin/rm -f *.o
-nuclear:
- /bin/rm -f psascan *.o
diff --git a/exttools/pSAscan-0.1.0/src/main.cpp b/exttools/pSAscan-0.1.0/src/main.cpp
deleted file mode 100644
index 8a70e1a2..00000000
--- a/exttools/pSAscan-0.1.0/src/main.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/**
- * @file src/main.cpp
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#include <string>
-#include <getopt.h>
-#include <unistd.h>
-#include <omp.h>
-
-#include "psascan_src/psascan.h"
-
-
-char *program_name;
-
-void usage(int status) {
- printf(
-"Usage: %s [OPTION]... FILE\n"
-"Construct the suffix array for text stored in FILE.\n"
-"\n"
-"Mandatory arguments to long options are mandatory for short options too.\n"
-" -g, --gap=GAPFILE specify the file holding the gap array (default:\n"
-" FILE.sa5.gap)\n"
-" -h, --help display this help and exit\n"
-" -m, --mem=LIMIT limit RAM usage to LIMIT MiB (default: 3072)\n"
-" -o, --output=OUTFILE specify the output file (default: FILE.sa5)\n"
-" -v, --verbose print detailed information during internal sufsort\n",
- program_name);
-
- std::exit(status);
-}
-
-bool file_exists(std::string fname) {
- std::FILE *f = std::fopen(fname.c_str(), "r");
- bool ret = (f != NULL);
- if (f != NULL) std::fclose(f);
-
- return ret;
-}
-
-int main(int argc, char **argv) {
- srand(time(0) + getpid());
- program_name = argv[0];
- bool verbose = false;
-
- static struct option long_options[] = {
- {"help", no_argument, NULL, 'h'},
- {"verbose", no_argument, NULL, 'v'},
- {"mem", required_argument, NULL, 'm'},
- {"output", required_argument, NULL, 'o'},
- {"gap", required_argument, NULL, 'g'},
- {NULL, 0, NULL, 0}
- };
-
- long ram_use = 3072L << 20;
- std::string out_fname("");
- std::string gap_fname("");
-
- // Parse command-line options.
- int c;
- while ((c = getopt_long(argc, argv, "hvm:o:g:", long_options, NULL)) != -1) {
- switch(c) {
- case 'm':
- ram_use = std::atol(optarg) << 20;
- if (ram_use <= 0L) {
- fprintf(stderr, "Error: invalid RAM limit (%ld)\n\n", ram_use);
- usage(EXIT_FAILURE);
- }
- break;
- case 'o':
- out_fname = std::string(optarg);
- break;
- case 'g':
- gap_fname = std::string(optarg);
- break;
- case 'v':
- verbose = true;
- break;
- case 'h':
- usage(EXIT_FAILURE);
- default:
- usage(EXIT_FAILURE);
- }
- }
-
- if (optind >= argc) {
- fprintf(stderr, "Error: FILE not provided\n\n");
- usage(EXIT_FAILURE);
- }
-
- // Parse the text filename.
- std::string text_fname = std::string(argv[optind++]);
- if (optind < argc) {
- fprintf(stderr, "Warning: multiple input files provided. "
- "Only the first will be processed.\n");
- }
-
- // Set default output filename (if not provided).
- if (out_fname.empty())
- out_fname = text_fname + ".sa5";
-
- // Set default gap filename (if not provided).
- if (gap_fname.empty())
- gap_fname = out_fname;
-
- // Check if input exists.
- if (!file_exists(text_fname)) {
- fprintf(stderr, "Error: input file (%s) does not exist\n\n",
- text_fname.c_str());
- usage(EXIT_FAILURE);
- }
-
- if (file_exists(out_fname)) {
- // Output file exists, should we proceed?
- char *line = NULL;
- size_t buflen = 0;
- long len = 0L;
-
- do {
- printf("Output file (%s) exists. Overwrite? [y/n]: ",
- out_fname.c_str());
- if ((len = getline(&line, &buflen, stdin)) == -1) {
- fprintf(stderr, "\nError: failed to read answer\n\n");
- usage(EXIT_FAILURE);
- }
- } while (len != 2 || (line[0] != 'y' && line[0] != 'n'));
-
- if (line[0] == 'n') {
- free(line);
- std::exit(EXIT_FAILURE);
- }
- free(line);
- }
-
- // Find the number of (logical) cores on the machine.
- long max_threads = (long)omp_get_max_threads();
-
- // Run pSAscan.
- pSAscan(text_fname, out_fname, gap_fname,
- ram_use, max_threads, verbose);
-}
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/approx_rank.h b/exttools/pSAscan-0.1.0/src/psascan_src/approx_rank.h
deleted file mode 100644
index 0a3fd2f0..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/approx_rank.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/**
- * @file src/psascan_src/approx_rank.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section DESCRIPTION
- *
- * The approximate rank data structure. Based on the 'sparse-LF'
- * data structure described in:
- *
- * Dominik Kempa, Simon J. Puglisi:
- * Lempel-Ziv Factorization: Simple, Fast, Practical.
- * In Proc. ALENEX 2013, p. 103-112.
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_APPROX_RANK_H_INCLUDED
-#define __PSASCAN_SRC_APPROX_RANK_H_INCLUDED
-
-#include <thread>
-#include <algorithm>
-
-
-namespace psascan_private {
-
-template<long k_sampling_rate_log>
-class approx_rank {
- private:
- long *m_list_size;
- long **m_list;
-
- static const long k_sampling_rate;
- static const long k_sampling_rate_mask;
-
- public:
- long *m_count;
-
- private:
- static void compute_symbol_count_aux(const unsigned char *text, long beg,
- long end, long *symbol_count) {
- for (long j = beg; j < end; ++j)
- ++symbol_count[text[j]];
- }
-
- static void compute_occ_list_aux(const unsigned char *text, long beg,
- long end, long *symbol_count, long **list) {
- // Compute where to start writing positions for each symbol.
- long *ptr = new long[256];
- for (long c = 0; c < 256; ++c)
- ptr[c] = (symbol_count[c] + k_sampling_rate - 1) / k_sampling_rate;
-
- // Add occurrences in the block to the lists.
- for (long j = beg; j < end; ++j) {
- unsigned char c = text[j];
- if (!((symbol_count[c]++) & k_sampling_rate_mask))
- list[c][ptr[c]++] = j;
- }
-
- // Clean up.
- delete[] ptr;
- }
-
- public:
- approx_rank(const unsigned char *text, long length, long max_threads) {
- // Compute symbol counts in each block.
- long max_block_size = (length + max_threads - 1) / max_threads;
- long n_threads = (length + max_block_size - 1) / max_block_size;
- long **symbol_count = new long*[n_threads];
- for (long j = 0; j < n_threads; ++j) {
- symbol_count[j] = new long[256];
- std::fill(symbol_count[j], symbol_count[j] + 256, 0L);
- }
-
- std::thread **threads = new std::thread*[n_threads];
- for (long t = 0; t < n_threads; ++t) {
- long block_beg = t * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
-
- threads[t] = new std::thread(compute_symbol_count_aux,
- text, block_beg, block_end, symbol_count[t]);
- }
-
- for (long t = 0; t < n_threads; ++t) threads[t]->join();
- for (long t = 0; t < n_threads; ++t) delete threads[t];
-
- // Compute (exclusive) partial sums over symbol counts.
- m_count = new long[256];
- std::fill(m_count, m_count + 256, 0L);
- long *temp_count = new long[256];
- for (long i = 0; i < n_threads; ++i) {
- std::copy(symbol_count[i], symbol_count[i] + 256, temp_count);
- std::copy(m_count, m_count + 256, symbol_count[i]);
- for (long j = 0; j < 256; ++j)
- m_count[j] += temp_count[j];
- }
- delete[] temp_count;
-
- // Compute sizes and allocate occurrences lists.
- m_list_size = new long[256];
- m_list = new long*[256];
- for (long i = 0; i < 256; ++i) {
- m_list_size[i] = (m_count[i] + k_sampling_rate - 1) / k_sampling_rate;
- if (m_list_size[i]) m_list[i] = new long[m_list_size[i]];
- else m_list[i] = NULL;
- }
-
- for (long t = 0; t < n_threads; ++t) {
- long block_beg = t * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
-
- threads[t] = new std::thread(compute_occ_list_aux, text,
- block_beg, block_end, symbol_count[t], m_list);
- }
-
- for (long t = 0; t < n_threads; ++t) threads[t]->join();
- for (long t = 0; t < n_threads; ++t) delete threads[t];
- delete[] threads;
-
-
- // Clean up.
- for (long j = 0; j < n_threads; ++j)
- delete[] symbol_count[j];
- delete[] symbol_count;
- }
-
- inline long rank(long i, unsigned char c) const {
- if (i <= 0 || (!m_list_size[c]) || m_list[c][0] >= i)
- return 0L;
-
- long left = 0, right = m_list_size[c];
- while (left + 1 != right) {
- // Invariant: the answer is in range [left..right).
- long mid = (left + right) / 2;
- if (m_list[c][mid] <= i) left = mid;
- else right = mid;
- }
- return (left << k_sampling_rate_log);
- }
-
- ~approx_rank() {
- delete[] m_count;
- delete[] m_list_size;
- for (long j = 0; j < 256; ++j) {
- if (m_list[j])
- delete[] m_list[j];
- }
- delete[] m_list;
- }
-};
-
-template<long k_sampling_rate_log>
-const long approx_rank<k_sampling_rate_log>::k_sampling_rate = (1L << k_sampling_rate_log);
-
-template<long k_sampling_rate_log>
-const long approx_rank<k_sampling_rate_log>::k_sampling_rate_mask = (1L << k_sampling_rate_log) - 1;
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_APPROX_RANK_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_backward_skip_stream_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_backward_skip_stream_reader.h
deleted file mode 100644
index be3dadc0..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_backward_skip_stream_reader.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/**
- * @file src/psascan_src/async_backward_skip_stream_reader.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_ASYNC_BACKWARD_SKIP_STREAM_READER_H_INCLUDED
-#define __PSASCAN_SRC_ASYNC_BACKWARD_SKIP_STREAM_READER_H_INCLUDED
-
-#include <cstdio>
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-#include <algorithm>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-template<typename value_type>
-struct async_backward_skip_stream_reader {
- template<typename T>
- static void io_thread_code(async_backward_skip_stream_reader<T> *reader) {
- while (true) {
- // Wait until the passive buffer is available.
- std::unique_lock<std::mutex> lk(reader->m_mutex);
- while (!(reader->m_avail) && !(reader->m_finished))
- reader->m_cv.wait(lk);
-
- if (!(reader->m_avail) && (reader->m_finished)) {
- // We're done, terminate the thread.
- lk.unlock();
- return;
- }
- lk.unlock();
-
- // Safely read the data from disk.
- long filepos = std::ftell(reader->m_file) / sizeof(T);
- long toread = std::min(reader->m_buf_size, filepos - reader->m_active_buf_filled);
- if (toread > 0) {
- std::fseek(reader->m_file, -((reader->m_active_buf_filled + toread) * sizeof(T)), SEEK_CUR);
- reader->m_passive_buf_filled = std::fread(reader->m_passive_buf, sizeof(T), toread, reader->m_file);
- }
-
- // Let the caller know that the I/O thread finished reading.
- lk.lock();
- reader->m_avail = false;
- lk.unlock();
- reader->m_cv.notify_one();
- }
- }
-
- async_backward_skip_stream_reader(std::string filename, long skip_elems, long bufsize = (4 << 20)) {
- m_file = utils::open_file(filename.c_str(), "r");
- std::fseek(m_file, -(skip_elems * sizeof(value_type)), SEEK_END);
-
-
- // Initialize buffers.
- long elems = std::max(2UL, (bufsize + sizeof(value_type) - 1) / sizeof(value_type));
- m_buf_size = elems / 2;
-
- m_active_buf_filled = 0L;
- m_passive_buf_filled = 0L;
- m_active_buf_pos = -1L;
- m_active_buf = (value_type *)malloc(m_buf_size * sizeof(value_type));
- m_passive_buf = (value_type *)malloc(m_buf_size * sizeof(value_type));
-
- m_finished = false;
-
- // Start the I/O thread and immediately start reading.
- m_avail = true;
- m_thread = new std::thread(io_thread_code<value_type>, this);
- }
-
- ~async_backward_skip_stream_reader() {
- // Let the I/O thread know that we're done.
- std::unique_lock<std::mutex> lk(m_mutex);
- m_finished = true;
- lk.unlock();
- m_cv.notify_one();
-
- // Wait for the thread to finish.
- m_thread->join();
-
- // Clean up.
- delete m_thread;
- free(m_active_buf);
- free(m_passive_buf);
- std::fclose(m_file);
- }
-
- // This function checks if the reading thread has already
- // prefetched the next buffer (the request should have been
- // issued before), and waits in case the prefetching was not
- // completed yet.
- void receive_new_buffer() {
- // Wait until the I/O thread finishes reading the previous
- // buffer. In most cases this step is instantaneous.
- std::unique_lock<std::mutex> lk(m_mutex);
- while (m_avail == true)
- m_cv.wait(lk);
-
- // Set the new active buffer.
- std::swap(m_active_buf, m_passive_buf);
- m_active_buf_filled = m_passive_buf_filled;
- m_active_buf_pos = m_active_buf_filled - 1L;
-
- // Let the I/O thread know that it can now prefetch
- // another buffer.
- m_avail = true;
- lk.unlock();
- m_cv.notify_one();
- }
-
- inline value_type read() {
- if (m_active_buf_pos < 0L) {
- // The active buffer run out of data.
- // At this point we need to swap it with the passive
- // buffer. The request to read that passive buffer should
- // have been scheduled long time ago, so hopefully the
- // buffer is now available. We check for that, but we
- // also might wait, if the reading has not yet been
- // finished. At this point we also already schedule
- // the next read.
- receive_new_buffer();
- }
-
- return m_active_buf[m_active_buf_pos--];
- }
-
-private:
- value_type *m_active_buf;
- value_type *m_passive_buf;
-
- long m_buf_size;
- long m_active_buf_pos;
- long m_active_buf_filled;
- long m_passive_buf_filled;
-
- // Used for synchronization with the I/O thread.
- std::mutex m_mutex;
- std::condition_variable m_cv;
- bool m_avail;
- bool m_finished;
-
- std::FILE *m_file;
- std::thread *m_thread;
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_ASYNC_BACKWARD_SKIP_STREAM_READER_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_bit_stream_writer.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_bit_stream_writer.h
deleted file mode 100644
index 6627ad00..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_bit_stream_writer.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/**
- * @file src/psascan_src/async_bit_stream_writer.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_ASYNC_BIT_STREAM_WRITER_H_INCLUDED
-#define __PSASCAN_SRC_ASYNC_BIT_STREAM_WRITER_H_INCLUDED
-
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-#include <algorithm>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-struct async_bit_stream_writer {
- static void io_thread_code(async_bit_stream_writer *writer) {
- while (true) {
- // Wait until the passive buffer is available.
- std::unique_lock<std::mutex> lk(writer->m_mutex);
- while (!(writer->m_avail) && !(writer->m_finished))
- writer->m_cv.wait(lk);
-
- if (!(writer->m_avail) && (writer->m_finished)) {
- // We're done, terminate the thread.
- lk.unlock();
- return;
- }
- lk.unlock();
-
- // Safely write the data to disk.
- utils::add_objects_to_file(writer->m_passive_buf,
- writer->m_passive_buf_filled, writer->m_file);
-
- // Let the caller know that the I/O thread finished writing.
- lk.lock();
- writer->m_avail = false;
- lk.unlock();
- writer->m_cv.notify_one();
- }
- }
-
- async_bit_stream_writer(std::string filename, long bufsize = (4 << 20)) {
- m_file = utils::open_file(filename.c_str(), "w");
-
- // Initialize buffers.
- long elems = std::max(2L, bufsize);
- m_buf_size = elems / 2; // both buffers are of the same size
-
- m_active_buf = (unsigned char *)malloc(m_buf_size);
- m_passive_buf = (unsigned char *)malloc(m_buf_size);
-
- m_active_buf[0] = 0;
- m_bit_pos = 0L;
- m_active_buf_filled = 0L;
- m_passive_buf_filled = 0L;
-
- m_avail = false;
- m_finished = false;
-
- // Start the I/O thread.
- m_thread = new std::thread(io_thread_code, this);
- }
-
- ~async_bit_stream_writer() {
- // Write the partially filled active buffer to disk.
- if (m_bit_pos != 0) ++m_active_buf_filled;
- if (m_active_buf_filled > 0L)
- send_active_buf_to_write();
-
- // Let the I/O thread know that we're done.
- std::unique_lock<std::mutex> lk(m_mutex);
- m_finished = true;
- lk.unlock();
- m_cv.notify_one();
-
- // Wait for the thread to finish.
- m_thread->join();
-
- // Clean up.
- delete m_thread;
- free(m_active_buf);
- free(m_passive_buf);
- std::fclose(m_file);
- }
-
- // Passes on the active buffer (full, unless it's the last one,
- // partially filled, buffer passed from destructor) to the I/O thread.
- void send_active_buf_to_write() {
- // Wait until the I/O thread finishes writing the previous buffer.
- std::unique_lock<std::mutex> lk(m_mutex);
- while (m_avail == true)
- m_cv.wait(lk);
-
- // Set the new passive buffer.
- std::swap(m_active_buf, m_passive_buf);
- m_passive_buf_filled = m_active_buf_filled;
- m_active_buf_filled = 0L;
- m_bit_pos = 0L;
- m_active_buf[0] = 0;
-
- // Let the I/O thread know that the buffer is waiting.
- m_avail = true;
- lk.unlock();
- m_cv.notify_one();
- }
-
- inline void write(unsigned char bit) {
- m_active_buf[m_active_buf_filled] |= (bit << m_bit_pos);
- ++m_bit_pos;
- if (m_bit_pos == 8) {
- m_bit_pos = 0;
- ++m_active_buf_filled;
-
- // If the active buffer was full, send it to I/O thread.
- // This function may wait a bit until the I/O thread
- // finishes writing the previous passive buffer.
- if (m_active_buf_filled == m_buf_size)
- send_active_buf_to_write();
-
- // Clear all bits in the current byte.
- m_active_buf[m_active_buf_filled] = 0;
- }
- }
-
-private:
- unsigned char *m_active_buf;
- unsigned char *m_passive_buf;
-
- long m_buf_size; // size of each of the buffers
- long m_bit_pos;
- long m_active_buf_filled;
- long m_passive_buf_filled;
-
- // Used for synchronization with the I/O thread.
- bool m_avail; // signals availability of buffer for I/O thread
- bool m_finished; // signals the end of writing
- std::mutex m_mutex;
- std::condition_variable m_cv;
-
- std::FILE *m_file;
- std::thread *m_thread;
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_ASYNC_STREAM_WRITER_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_multifile_bit_stream_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_multifile_bit_stream_reader.h
deleted file mode 100644
index f9ee8cec..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_multifile_bit_stream_reader.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/**
- * @file src/psascan_src/async_multifile_bit_stream_reader.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_ASYNC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED
-#define __PSASCAN_SRC_ASYNC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED
-
-#include <thread>
-#include <mutex>
-#include <vector>
-#include <algorithm>
-#include <condition_variable>
-
-#include "utils.h"
-#include "multifile.h"
-
-
-namespace psascan_private {
-
-struct async_multifile_bit_stream_reader {
- async_multifile_bit_stream_reader(const multifile *m, long start_pos = 0L,
- long bufsize = (4L << 20)) {
- m_files_info = m->files_info;
-
- long items = std::max(2L, bufsize);
- m_buf_size = items / 2L;
-
- // Reset counters.
- m_active_buf_filled = 0;
- m_passive_buf_filled = 0;
- m_active_buf_pos = 0;
-
- // Initialize buffers.
- m_active_buf = (unsigned char *)malloc(m_buf_size);
- m_passive_buf = (unsigned char *)malloc(m_buf_size);
-
- // Initialize the reading.
- init(start_pos);
- }
-
- void init(long start_pos) {
- m_total_read_buf = start_pos;
-
- m_file = NULL;
- for (size_t j = 0; j < m_files_info.size(); ++j) {
- if (m_files_info[j].m_beg <= start_pos && start_pos < m_files_info[j].m_end) {
- m_file_id = j;
- m_file = utils::open_file(m_files_info[j].m_filename, "r");
- break;
- }
- }
-
- if (m_file != NULL) {
- long offset = start_pos - m_files_info[m_file_id].m_beg;
- std::fseek(m_file, offset >> 3, SEEK_SET);
-
- m_cur_byte = 0;
- m_cur_bit = (offset & 7L);
- m_active_buf_pos = m_cur_bit;
- m_total_read_buf -= m_cur_bit;
-
- long file_left = m_files_info[m_file_id].m_end - m_total_read_buf;
- m_active_buf_filled = std::min(file_left, 8L * m_buf_size);
- long toread_bytes = (m_active_buf_filled + 7L) / 8L;
- utils::read_n_objects_from_file(m_active_buf, toread_bytes, m_file);
- m_total_read_buf += m_active_buf_filled;
- if (m_total_read_buf == m_files_info[m_file_id].m_end) {
- std::fclose(m_file);
- m_file = NULL;
- }
- }
-
- m_avail = true;
- m_finished = false;
- m_thread = new std::thread(async_io_code, this);
- }
-
- inline bool read() {
- if (m_active_buf_pos == m_active_buf_filled)
- receive_new_buffer();
-
- bool result = (m_active_buf[m_cur_byte] & (1 << m_cur_bit));
- ++m_cur_bit;
- ++m_active_buf_pos;
- if (m_cur_bit == 8) {
- m_cur_bit = 0;
- ++m_cur_byte;
- }
-
- return result;
- }
-
- ~async_multifile_bit_stream_reader() {
- // Let the I/O thread know that we are done.
- std::unique_lock<std::mutex> lk(m_mutex);
- m_finished = true;
- lk.unlock();
- m_cv.notify_one();
-
- // Wait for the thread to finish.
- m_thread->join();
-
- // Clean up.
- delete m_thread;
- free(m_active_buf);
- free(m_passive_buf);
- if (m_file)
- std::fclose(m_file);
- }
-
- static void async_io_code(async_multifile_bit_stream_reader *file) {
- while (true) {
- // Wait until the passive buffer is available.
- std::unique_lock<std::mutex> lk(file->m_mutex);
- while (!(file->m_avail) && !(file->m_finished))
- file->m_cv.wait(lk);
-
- if (!(file->m_avail) && (file->m_finished)) {
- // We're done, terminate the thread.
- lk.unlock();
- return;
- }
- lk.unlock();
-
- if (file->m_file == NULL) {
- // Find the next file to open.
- for (size_t j = 0; j < file->m_files_info.size(); ++j)
- if (file->m_files_info[j].m_beg == file->m_total_read_buf) {
- file->m_file_id = j;
- file->m_file = utils::open_file(file->m_files_info[j].m_filename, "r");
- break;
- }
- }
-
- // If file ID was found, we perform the read.
- // Otherwise there is no more data to prefetch.
- if (file->m_file != NULL) {
- long file_left = file->m_files_info[file->m_file_id].m_end - file->m_total_read_buf;
- file->m_passive_buf_filled = std::min(file_left, 8L * (file->m_buf_size));
- long toread_bytes = (file->m_passive_buf_filled + 7L) / 8L;
- utils::read_n_objects_from_file(file->m_passive_buf, toread_bytes, file->m_file);
- file->m_total_read_buf += file->m_passive_buf_filled;
- if (file->m_total_read_buf == file->m_files_info[file->m_file_id].m_end) {
- std::fclose(file->m_file);
- file->m_file = NULL;
- }
- }
-
- // Let the caller know that the I/O thread finished reading.
- lk.lock();
- file->m_avail = false;
- lk.unlock();
- file->m_cv.notify_one();
- }
- }
-
- void receive_new_buffer() {
- // Wait until the I/O thread finishes reading the previous
- // buffer. Most of the time this step is instantaneous.
- std::unique_lock<std::mutex> lk(m_mutex);
- while (m_avail == true)
- m_cv.wait(lk);
-
- // Set the new active buffer.
- std::swap(m_active_buf, m_passive_buf);
- m_active_buf_filled = m_passive_buf_filled;
- m_active_buf_pos = 0;
- m_cur_byte = 0;
- m_cur_bit = 0;
-
- // Let the I/O thread know that it can now
- // prefetch another buffer.
- m_avail = true;
- lk.unlock();
- m_cv.notify_one();
- }
-
-private:
- std::FILE *m_file; // file handler
- long m_total_read_buf; // total number of items read from files into buffers
- long m_file_id;
- std::vector<single_file_info> m_files_info;
-
- // Buffers used for asynchronous reading.
- unsigned char *m_active_buf;
- unsigned char *m_passive_buf;
- long m_buf_size;
- long m_active_buf_pos;
- long m_active_buf_filled;
- long m_passive_buf_filled;
-
- long m_cur_byte;
- long m_cur_bit;
-
- // For synchronization with thread doing asynchronous reading.
- std::thread *m_thread;
- std::mutex m_mutex;
- std::condition_variable m_cv;
- bool m_finished;
- bool m_avail;
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_ASYNC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_stream_writer.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_stream_writer.h
deleted file mode 100644
index 3f7deb54..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_stream_writer.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/**
- * @file src/psascan_src/async_stream_writer.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_ASYNC_STREAM_WRITER_H_INCLUDED
-#define __PSASCAN_SRC_ASYNC_STREAM_WRITER_H_INCLUDED
-
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-#include <algorithm>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-template<typename value_type>
-struct async_stream_writer {
- template<typename T>
- static void io_thread_code(async_stream_writer<T> *writer) {
- while (true) {
- // Wait until the passive buffer is available.
- std::unique_lock<std::mutex> lk(writer->m_mutex);
- while (!(writer->m_avail) && !(writer->m_finished))
- writer->m_cv.wait(lk);
-
- if (!(writer->m_avail) && (writer->m_finished)) {
- // We're done, terminate the thread.
- lk.unlock();
- return;
- }
- lk.unlock();
-
- // Safely write the data to disk.
- utils::add_objects_to_file(writer->m_passive_buf,
- writer->m_passive_buf_filled, writer->m_file);
-
- // Let the caller know that the I/O thread finished writing.
- lk.lock();
- writer->m_avail = false;
- lk.unlock();
- writer->m_cv.notify_one();
- }
- }
-
- async_stream_writer(std::string filename, long bufsize = (4 << 20)) {
- m_file = utils::open_file(filename.c_str(), "w");
-
- // Initialize buffers.
- long elems = std::max(2UL, (bufsize + sizeof(value_type) - 1) / sizeof(value_type));
- m_buf_size = elems / 2; // both buffers are of the same size
-
- m_active_buf_filled = 0L;
- m_passive_buf_filled = 0L;
-
- m_active_buf = (value_type *)malloc(m_buf_size * sizeof(value_type));
- m_passive_buf = (value_type *)malloc(m_buf_size * sizeof(value_type));
-
- m_avail = false;
- m_finished = false;
-
- // Start the I/O thread.
- m_thread = new std::thread(io_thread_code<value_type>, this);
- }
-
- ~async_stream_writer() {
- // Write the partially filled active buffer to disk.
- if (m_active_buf_filled > 0L)
- send_active_buf_to_write();
-
- // Let the I/O thread know that we're done.
- std::unique_lock<std::mutex> lk(m_mutex);
- m_finished = true;
- lk.unlock();
- m_cv.notify_one();
-
- // Wait for the thread to finish.
- m_thread->join();
-
- // Clean up.
- delete m_thread;
- free(m_active_buf);
- free(m_passive_buf);
- std::fclose(m_file);
- }
-
- // Passes on the active buffer (full, unless it's the last one,
- // partially filled, buffer passed from destructor) to the I/O thread.
- void send_active_buf_to_write() {
- // Wait until the I/O thread finishes writing the previous buffer.
- std::unique_lock<std::mutex> lk(m_mutex);
- while (m_avail == true)
- m_cv.wait(lk);
-
- // Set the new passive buffer.
- std::swap(m_active_buf, m_passive_buf);
- m_passive_buf_filled = m_active_buf_filled;
- m_active_buf_filled = 0L;
-
- // Let the I/O thread know that the buffer is waiting.
- m_avail = true;
- lk.unlock();
- m_cv.notify_one();
- }
-
- inline void write(value_type x) {
- m_active_buf[m_active_buf_filled++] = x;
-
- // If the active buffer was full, send it to I/O thread.
- // This function may wait a bit until the I/O thread
- // finishes writing the previous passive buffer.
- if (m_active_buf_filled == m_buf_size)
- send_active_buf_to_write();
- }
-
-private:
- value_type *m_active_buf;
- value_type *m_passive_buf;
-
- long m_buf_size; // size of each of the buffers
- long m_active_buf_filled;
- long m_passive_buf_filled;
-
- // Used for synchronization with the I/O thread.
- bool m_avail; // signals availability of buffer for I/O thread
- bool m_finished; // signals the end of writing
- std::mutex m_mutex;
- std::condition_variable m_cv;
-
- std::FILE *m_file;
- std::thread *m_thread;
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_ASYNC_STREAM_WRITER_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/async_vbyte_stream_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/async_vbyte_stream_reader.h
deleted file mode 100644
index 18bd0cb1..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/async_vbyte_stream_reader.h
+++ /dev/null
@@ -1,185 +0,0 @@
-/**
- * @file src/psascan_src/async_vbyte_stream_reader.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_ASYNC_VBYTE_STREAM_READER_H_INCLUDED
-#define __PSASCAN_SRC_ASYNC_VBYTE_STREAM_READER_H_INCLUDED
-
-#include <cstdio>
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-#include <algorithm>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-template<typename value_type>
-struct async_vbyte_stream_reader {
- static void io_thread_code(async_vbyte_stream_reader *reader) {
- while (true) {
- // Wait until the passive buffer is available.
- std::unique_lock<std::mutex> lk(reader->m_mutex);
- while (!(reader->m_avail) && !(reader->m_finished))
- reader->m_cv.wait(lk);
-
- if (!(reader->m_avail) && (reader->m_finished)) {
- // We're done, terminate the thread.
- lk.unlock();
- return;
- }
- lk.unlock();
-
- // Safely read the data from disk.
- long count = std::fread(reader->m_passive_buf, 1, reader->m_buf_size + 128, reader->m_file);
- if (count > reader->m_buf_size) {
- reader->m_passive_buf_filled = reader->m_buf_size;
- std::fseek(reader->m_file, reader->m_buf_size - count, SEEK_CUR);
- } else reader->m_passive_buf_filled = count;
-
- // Let the caller know that the I/O thread finished reading.
- lk.lock();
- reader->m_avail = false;
- lk.unlock();
- reader->m_cv.notify_one();
- }
- }
-
- async_vbyte_stream_reader(std::string filename, long bufsize = (4L << 20)) {
- m_file = utils::open_file(filename.c_str(), "r");
-
- // Initialize buffers.
- long elems = std::max(4096L, bufsize);
- m_buf_size = elems / 2;
-
- m_active_buf_filled = 0L;
- m_passive_buf_filled = 0L;
- m_active_buf_pos = 0L;
- m_active_buf = (unsigned char *)malloc(m_buf_size + 128);
- m_passive_buf = (unsigned char *)malloc(m_buf_size + 128);
-
- m_finished = false;
-
- // Start the I/O thread and immediately start reading.
- m_avail = true;
- m_thread = new std::thread(io_thread_code, this);
- }
-
- ~async_vbyte_stream_reader() {
- // Let the I/O thread know that we're done.
- std::unique_lock<std::mutex> lk(m_mutex);
- m_finished = true;
- lk.unlock();
- m_cv.notify_one();
-
- // Wait for the thread to finish.
- m_thread->join();
-
- // Clean up.
- delete m_thread;
- free(m_active_buf);
- free(m_passive_buf);
- std::fclose(m_file);
- }
-
- // This function checks if the reading thread has already
- // prefetched the next buffer (the request should have been
- // issued before), and waits in case the prefetching was not
- // completed yet.
- void receive_new_buffer(long skipped_bytes) {
- // Wait until the I/O thread finishes reading the previous
- // buffer. In most cases, this step is instantaneous.
- std::unique_lock<std::mutex> lk(m_mutex);
- while (m_avail == true)
- m_cv.wait(lk);
-
- // Set the new active buffer.
- std::swap(m_active_buf, m_passive_buf);
- m_active_buf_filled = m_passive_buf_filled;
- m_active_buf_pos = skipped_bytes;
-
- // Let the I/O thread know that it can now prefetch
- // another buffer.
- m_avail = true;
- lk.unlock();
- m_cv.notify_one();
- }
-
- inline value_type read() {
- if (m_active_buf_pos >= m_active_buf_filled) {
- // The active buffer run out of data.
- // At this point we need to swap it with the passive
- // buffer. The request to read that passive buffer should
- // have been scheduled long time ago, so hopefully the
- // buffer is now available. We check for that, but we
- // also might wait, if the reading has not yet been finished.
- // At this point we also already schedule the next read.
- receive_new_buffer(m_active_buf_pos - m_active_buf_filled);
- }
-
- value_type result = 0L;
- long offset = 0L;
- while (m_active_buf[m_active_buf_pos] & 0x80) {
- result |= (((value_type)m_active_buf[m_active_buf_pos++] & 0x7F) << offset);
- offset += 7;
- }
- result |= ((value_type)m_active_buf[m_active_buf_pos++] << offset);
-
- return result;
- }
-
-private:
- unsigned char *m_active_buf;
- unsigned char *m_passive_buf;
-
- long m_buf_size;
- long m_active_buf_pos;
- long m_active_buf_filled;
- long m_passive_buf_filled;
-
- // Used for synchronization with the I/O thread.
- std::mutex m_mutex;
- std::condition_variable m_cv;
- bool m_avail;
- bool m_finished;
-
- std::FILE *m_file;
- std::thread *m_thread;
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_ASYNC_VBYTE_STREAM_READER_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/background_block_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/background_block_reader.h
deleted file mode 100644
index 517bb68a..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/background_block_reader.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/**
- * @file src/psascan_src/background_block_reader.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_BACKGROUND_BLOCK_READER_H_INCLUDED
-#define __PSASCAN_SRC_BACKGROUND_BLOCK_READER_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <algorithm>
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-struct background_block_reader {
- public:
- unsigned char *m_data;
- long m_start;
- long m_size;
-
- private:
- static const long k_chunk_size;
-
- // These variables are protected by m_mutex.
- long m_fetched;
- bool m_signal_stop;
- bool m_joined;
-
- std::mutex m_mutex;
-
- // This condition variable is used by the I/O thread to notify
- // the waiting threads when the next chunk is read.
- std::condition_variable m_cv;
-
- std::thread *m_thread;
- std::FILE *m_file;
-
- private:
- static void io_thread_main(background_block_reader &reader) {
- while (true) {
- std::unique_lock<std::mutex> lk(reader.m_mutex);
- long fetched = reader.m_fetched;
- bool signal_stop = reader.m_signal_stop;
- lk.unlock();
-
- if (fetched == reader.m_size || signal_stop) break;
-
- long toread = std::min(reader.m_size - fetched, reader.k_chunk_size);
- unsigned char *dest = reader.m_data + fetched;
- utils::read_n_objects_from_file(dest, toread, reader.m_file);
-
- lk.lock();
- reader.m_fetched += toread;
- lk.unlock();
- reader.m_cv.notify_all();
- }
-
- // Close the file and exit.
- std::fclose(reader.m_file);
- }
-
- public:
- background_block_reader(std::string filename, long start, long size) {
- m_start = start;
- m_size = size;
-
- // Initialize file and buffer.
- m_data = (unsigned char *)malloc(m_size);
- m_file = utils::open_file(filename, "r");
- std::fseek(m_file, m_start, SEEK_SET);
- m_fetched = 0;
-
- // Start the I/O thread.
- m_signal_stop = false;
- m_joined = false;
- m_thread = new std::thread(io_thread_main, std::ref(*this));
- }
-
- ~background_block_reader() {
- if (!m_joined) {
- fprintf(stderr, "\nError: the I/O thread is still not joined when "
- "destroying an object of backgroud_block_reader.\n");
- std::exit(EXIT_FAILURE);
- }
-
- // Note: m_file is already closed.
- delete m_thread;
- free(m_data);
- }
-
- inline void stop() {
- // Set the flag for the thread to stop.
- std::unique_lock<std::mutex> lk(m_mutex);
- m_signal_stop = true;
- lk.unlock();
-
- // Wait until the thread notices the flag and exits. Possibly the thread
- // is already not running, but in this case this call will do nothing.
- m_thread->join();
-
- // To detect (in the destructor) if stop() was called.
- lk.lock();
- m_joined = true;
- lk.unlock();
- }
-
- inline void wait(long target_fetched) {
- std::unique_lock<std::mutex> lk(m_mutex);
- while (m_fetched < target_fetched)
- m_cv.wait(lk);
- lk.unlock();
- }
-};
-
-const long background_block_reader::k_chunk_size = (1L << 20);
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_BACKGROUND_BLOCK_READER_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/background_chunk_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/background_chunk_reader.h
deleted file mode 100644
index 4e7fce44..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/background_chunk_reader.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/**
- * @file src/psascan_src/background_chunk_reader.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_BACKGROUND_CHUNK_READER_H_INCLUDED
-#define __PSASCAN_SRC_BACKGROUND_CHUNK_READER_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include <algorithm>
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-struct background_chunk_reader {
- private:
- std::FILE *m_file;
- long m_chunk_length;
- long m_end;
-
- std::condition_variable m_cv;
- std::mutex m_mutex;
- std::thread *m_thread;
-
- bool m_signal_read_next_chunk;
- bool m_signal_stop;
-
- long m_cur;
- unsigned char *m_passive_chunk;
-
- public:
- unsigned char *m_chunk;
-
- private:
- static void async_io_code(background_chunk_reader &r) {
- while (true) {
- std::unique_lock<std::mutex> lk(r.m_mutex);
- while (!r.m_signal_read_next_chunk && !r.m_signal_stop)
- r.m_cv.wait(lk);
-
- bool sig_stop = r.m_signal_stop;
- r.m_signal_read_next_chunk = false;
- lk.unlock();
-
- if (sig_stop) break;
-
- long next_chunk_length = std::min(r.m_chunk_length, r.m_end - r.m_cur);
- utils::read_n_objects_from_file(r.m_passive_chunk, next_chunk_length, r.m_file);
-
- lk.lock();
- r.m_cur += next_chunk_length;
- lk.unlock();
- r.m_cv.notify_all();
- }
- }
-
- public:
- background_chunk_reader(std::string filename, long beg,
- long end, long chunk_length = (1L << 20)) {
- if (beg > end) {
- fprintf(stderr, "Error: beg > end in background_chunk_reader.\n");
- std::exit(EXIT_FAILURE);
- }
-
- if (beg == end) return;
-
- m_cur = beg;
- m_end = end;
-
- m_chunk_length = chunk_length;
- m_chunk = (unsigned char *)malloc(m_chunk_length);
- m_passive_chunk = (unsigned char *)malloc(m_chunk_length);
-
- m_file = utils::open_file(filename, "r");
- std::fseek(m_file, m_cur, SEEK_SET);
-
- m_signal_stop = false;
- m_signal_read_next_chunk = true;
- m_thread = new std::thread(async_io_code, std::ref(*this));
- }
-
- inline void wait(long end) {
- if (end > m_end) {
- fprintf(stderr, "Error: end > m_end in background_chunk_reader.\n");
- std::exit(EXIT_FAILURE);
- }
-
- std::unique_lock<std::mutex> lk(m_mutex);
- while (m_cur != end)
- m_cv.wait(lk);
-
- if (m_signal_read_next_chunk) {
- fprintf(stderr, "Error: m_signal_read_next_chunk in the wrong state.\n");
- std::exit(EXIT_FAILURE);
- }
-
- std::swap(m_chunk, m_passive_chunk);
- m_signal_read_next_chunk = true;
-
- lk.unlock();
- m_cv.notify_all();
- }
-
- ~background_chunk_reader() {
- std::unique_lock<std::mutex> lk(m_mutex);
- m_signal_stop = true;
- lk.unlock();
- m_cv.notify_all();
-
- // Wait until the thread notices the flag and exits. Possibly the thread
- // is already not running, but in this case this call will do nothing.
- m_thread->join();
-
- std::fclose(m_file);
-
- // Clean up.
- delete m_thread;
- free(m_chunk);
- free(m_passive_chunk);
- }
-
- inline long get_chunk_size() const {
- return m_chunk_length;
- }
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_BACKGROUND_CHUNK_READER_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/bitvector.h b/exttools/pSAscan-0.1.0/src/psascan_src/bitvector.h
deleted file mode 100644
index 11498a28..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/bitvector.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/**
- * @file src/psascan_src/bitvector.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_BITVECTOR_H_INCLUDED
-#define __PSASCAN_SRC_BITVECTOR_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <stdint.h>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-struct bitvector {
- private:
- long m_alloc_bytes;
- unsigned char *m_data;
-
- public:
- bitvector(std::string filename) {
- utils::read_objects_from_file<unsigned char>(m_data, m_alloc_bytes, filename);
- }
-
- bitvector(long length) {
- m_alloc_bytes = (length + 7) / 8;
- m_data = (unsigned char *)calloc(m_alloc_bytes, sizeof(unsigned char));
- }
-
- inline bool get(long i) const {
- return m_data[i >> 3] & (1 << (i & 7));
- }
-
- inline void set(long i) {
- m_data[i >> 3] |= (1 << (i & 7));
- }
-
- inline void reset(long i) {
- m_data[i >> 3] &= (~(1 << (i & 7)));
- }
-
- inline void flip(long i) {
- if (get(i)) reset(i);
- else set(i);
- }
-
- inline void save(std::string filename) const {
- utils::write_objects_to_file<unsigned char>(m_data, m_alloc_bytes, filename);
- }
-
- // Number of 1 bits in the range [beg..end).
- long range_sum(long beg, long end) const {
- long result = 0L;
-
- long j = beg;
- while (j < end && (j & 63))
- result += get(j++);
-
- uint64_t *ptr64 = (uint64_t *)(m_data + (j >> 3));
- while (j + 64 <= end) {
- result += __builtin_popcountll(*ptr64++);
- j += 64;
- }
-
- while (j < end)
- result += get(j++);
-
- return result;
- }
-
- ~bitvector() {
- free(m_data);
- }
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_BITVECTOR_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/bwt_merge.h b/exttools/pSAscan-0.1.0/src/psascan_src/bwt_merge.h
deleted file mode 100644
index 437f2298..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/bwt_merge.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/**
- * @file src/psascan_src/bwt_merge.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_BWT_MERGE_H_INCLUDED
-#define __PSASCAN_SRC_BWT_MERGE_H_INCLUDED
-
-#include <thread>
-#include <algorithm>
-
-#include "bitvector.h"
-#include "ranksel_support.h"
-
-
-namespace psascan_private {
-
-//==============================================================================
-// Compute bwt[beg..end).
-//==============================================================================
-void merge_bwt_aux(long beg, long end, long left_ptr, long right_ptr,
- const unsigned char *left_bwt, const unsigned char *right_bwt, unsigned char *bwt,
- const bitvector *bv) {
- for (long i = beg; i < end; ++i) {
- if (bv->get(i)) bwt[i] = right_bwt[right_ptr++];
- else bwt[i] = left_bwt[left_ptr++];
- }
-}
-
-void compute_initial_rank(long i, const ranksel_support *ranksel, long &result) {
- result = ranksel->rank(i);
-}
-
-//==============================================================================
-// Merge partial bwt of half-blocks (of size left_size and right_size) into
-// partial bwt of the whole block.
-//==============================================================================
-long merge_bwt(const unsigned char *left_bwt, const unsigned char *right_bwt,
- long left_size, long right_size, long left_block_i0, long right_block_i0,
- unsigned char left_block_last, unsigned char *bwt, const bitvector *bv,
- long max_threads) {
- long block_size = left_size + right_size;
-
- // 1
- //
- // Initialize rank/select queries support for bv.
- ranksel_support *bv_ranksel = new ranksel_support(bv, block_size, max_threads);
-
- // 2
- //
- // Compute range size.
- long max_range_size = (block_size + max_threads - 1) / max_threads;
- long n_ranges = (block_size + max_range_size - 1) / max_range_size;
-
- // 3
- //
- // Compute starting parameters for each thread.
- long *left_ptr = new long[n_ranges];
- long *right_ptr = new long[n_ranges];
- long *rank_at_range_beg = new long[n_ranges];
-
- std::thread **threads = new std::thread*[n_ranges];
- for (long t = 0; t < n_ranges; ++t) {
- long range_beg = t * max_range_size;
- threads[t] = new std::thread(compute_initial_rank,
- range_beg, bv_ranksel, std::ref(rank_at_range_beg[t]));
- }
-
- for (long t = 0; t < n_ranges; ++t) threads[t]->join();
- for (long t = 0; t < n_ranges; ++t) delete threads[t];
-
- for (long t = 0; t < n_ranges; ++t) {
- long range_beg = t * max_range_size;
- left_ptr[t] = range_beg - rank_at_range_beg[t];
- right_ptr[t] = rank_at_range_beg[t];
- }
- delete[] rank_at_range_beg;
-
- // 4
- //
- // Merge BWTs in parallel.
- for (long t = 0; t < n_ranges; ++t) {
- long range_beg = max_range_size * t;
- long range_end = std::min(range_beg + max_range_size, block_size);
-
- threads[t] = new std::thread(merge_bwt_aux, range_beg, range_end,
- left_ptr[t], right_ptr[t], left_bwt, right_bwt, bwt, bv);
- }
-
- for (long t = 0; t < n_ranges; ++t) threads[t]->join();
- for (long t = 0; t < n_ranges; ++t) delete threads[t];
- delete[] threads;
- delete[] left_ptr;
- delete[] right_ptr;
-
- // 5
- //
- // Find position j = select_1(bv, right_block_i0) and replace bwt[j] with
- // left_block_last. To speed up the search for j, we use sparse_rank.
- bwt[bv_ranksel->select1(right_block_i0)] = left_block_last;
-
- // 6
- //
- // Compute the returned value.
- long block_i0 = bv_ranksel->select0(left_block_i0);
-
- // 7
- //
- // Clean up and exit.
- delete bv_ranksel;
- return block_i0;
-}
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_BWT_MERGE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/compute_gap.h b/exttools/pSAscan-0.1.0/src/psascan_src/compute_gap.h
deleted file mode 100644
index 683f0ae5..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/compute_gap.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/**
- * @file src/psascan_src/compute_gap.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_COMPUTE_GAP_H_INCLUDED
-#define __PSASCAN_SRC_COMPUTE_GAP_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <string>
-#include <thread>
-#include <algorithm>
-#include <vector>
-
-#include "utils.h"
-#include "rank.h"
-#include "gap_array.h"
-#include "gap_buffer.h"
-#include "stream.h"
-#include "update.h"
-#include "stream_info.h"
-#include "multifile.h"
-
-
-namespace psascan_private {
-
-//==============================================================================
-// Compute the gap for an arbitrary range of suffixes of tail. This version is
-// more general, and can be used also when processing half-blocks.
-//==============================================================================
-template<typename block_offset_type>
-void compute_gap(const rank4n<> *rank, buffered_gap_array *gap,
- long tail_begin, long tail_end, long text_length, long max_threads,
- long block_isa0, long gap_buf_size, unsigned char block_last_symbol,
- std::vector<long> initial_ranks, std::string text_filename, std::string output_filename,
- const multifile *tail_gt_begin_rev, multifile *newtail_gt_begin_rev) {
- long tail_length = tail_end - tail_begin;
- long stream_max_block_size = (tail_length + max_threads - 1) / max_threads;
- long n_threads = (tail_length + stream_max_block_size - 1) / stream_max_block_size;
-
- fprintf(stderr, " Stream:");
- long double stream_start = utils::wclock();
-
- // 1
- //
- // Get symbol counts of a block and turn into exclusive partial sum.
- long *count = new long[256];
- std::copy(rank->m_count, rank->m_count + 256, count);
- ++count[block_last_symbol];
- --count[0];
- for (long j = 0, s = 0, t; j < 256; ++j) {
- t = count[j];
- count[j] = s;
- s += t;
- }
-
- // 2
- //
- // Allocate gap buffers.
- long n_gap_buffers = 2 * max_threads;
- gap_buffer<block_offset_type> **gap_buffers = new gap_buffer<block_offset_type>*[n_gap_buffers];
- for (long i = 0L; i < n_gap_buffers; ++i)
- gap_buffers[i] = new gap_buffer<block_offset_type>(gap_buf_size, max_threads);
-
- // 3
- //
- // Create poll of empty and full buffers.
- gap_buffer_poll<block_offset_type> *empty_gap_buffers = new gap_buffer_poll<block_offset_type>();
- gap_buffer_poll<block_offset_type> *full_gap_buffers = new gap_buffer_poll<block_offset_type>(n_threads);
-
- // 4
- //
- // Add all buffers to the poll of empty buffers.
- for (long i = 0L; i < n_gap_buffers; ++i)
- empty_gap_buffers->add(gap_buffers[i]);
-
- // 5
- //
- // Start threads doing the backward search.
- stream_info info(n_threads, tail_length);
- std::thread **streamers = new std::thread*[n_threads];
- std::vector<std::string> gt_filenames(n_threads);
-
- for (long t = 0L; t < n_threads; ++t) {
- long stream_block_beg = tail_begin + t * stream_max_block_size;
- long stream_block_end = std::min(stream_block_beg + stream_max_block_size, tail_end);
-
- gt_filenames[t] = output_filename + ".gt_tail." + utils::random_string_hash();
- newtail_gt_begin_rev->add_file(text_length - stream_block_end, text_length - stream_block_beg, gt_filenames[t]);
-
- streamers[t] = new std::thread(parallel_stream<block_offset_type>, full_gap_buffers, empty_gap_buffers, stream_block_beg,
- stream_block_end, initial_ranks[t], count, block_isa0, rank, block_last_symbol, text_filename, text_length,
- std::ref(gt_filenames[t]), &info, t, gap->m_length, gap_buf_size, tail_gt_begin_rev, max_threads);
- }
-
- // 6
- //
- // Start threads doing the gap array updates.
- std::thread *updater = new std::thread(gap_updater<block_offset_type>,
- full_gap_buffers, empty_gap_buffers, gap, max_threads);
-
- // 7
- //
- // Wait for all threads to finish.
- for (long i = 0L; i < n_threads; ++i) streamers[i]->join();
- updater->join();
-
- // 8
- //
- // Clean up.
- for (long i = 0L; i < n_threads; ++i) delete streamers[i];
- for (long i = 0L; i < n_gap_buffers; ++i) delete gap_buffers[i];
- delete updater;
- delete[] streamers;
- delete[] gap_buffers;
- delete empty_gap_buffers;
- delete full_gap_buffers;
- delete[] count;
-
- // 9
- //
- // Print summary and exit.
- long double stream_time = utils::wclock() - stream_start;
- long double speed = (tail_length / (1024.L * 1024)) / stream_time;
- fprintf(stderr,"\r Stream: 100.0%%. Time: %.2Lfs. Speed: %.2LfMiB/s\n",
- stream_time, speed);
-}
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_COMPUTE_GAP_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/compute_left_gap.h b/exttools/pSAscan-0.1.0/src/psascan_src/compute_left_gap.h
deleted file mode 100644
index 4b07489e..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/compute_left_gap.h
+++ /dev/null
@@ -1,312 +0,0 @@
-/**
- * @file src/psascan_src/compute_left_gap.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_COMPUTE_LEFT_GAP_H_INCLUDED
-#define __PSASCAN_SRC_COMPUTE_LEFT_GAP_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-#include <algorithm>
-
-#include "bitvector.h"
-#include "ranksel_support.h"
-#include "gap_array.h"
-#include "parallel_utils.h"
-
-
-namespace psascan_private {
-
-//==============================================================================
-// Compute the range_gap values corresponging to bv[part_beg..part_end).
-//==============================================================================
-void lblock_handle_bv_part(long part_beg, long part_end, long range_beg,
- long *range_gap, const gap_array_2n *block_gap, const bitvector *bv,
- const ranksel_support *bv_ranksel, long &res_sum, long &res_rank) {
- size_t excess_ptr = std::lower_bound(block_gap->m_excess.begin(),
- block_gap->m_excess.end(), part_beg) - block_gap->m_excess.begin();
-
- // Initialize j.
- long j = part_beg;
-
- // Compute gap[j].
- long gap_j = block_gap->m_count[j];
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) {
- ++excess_ptr;
- gap_j += (1L << 16);
- }
-
- // Initialize sum.
- long sum = gap_j + 1;
-
- while (j != part_end - 1 && bv->get(j) == 1) {
- // Update j.
- ++j;
-
- // Compute gap[j].
- gap_j = block_gap->m_count[j];
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) {
- ++excess_ptr;
- gap_j += (1L << 16);
- }
-
- // Update sum.
- sum += gap_j + 1;
- }
- if (bv->get(j) == 0) --sum;
-
- // Store gap[part_beg] + .. + gap[j] and bv.rank0(part_beg) (== bv.rank0(j)).
- res_sum = sum;
- res_rank = bv_ranksel->rank0(part_beg);
-
- if (j == part_end - 1)
- return;
-
- sum = 0L;
- long range_gap_ptr = res_rank + 1;
- while (j != part_end - 1) {
- // Update j.
- ++j;
-
- // Compute gap[j].
- gap_j = block_gap->m_count[j];
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) {
- ++excess_ptr;
- gap_j += (1L << 16);
- }
-
- // Update sum.
- sum += gap_j + 1;
-
- // Update range_gap.
- if (bv->get(j) == 0) {
- range_gap[range_gap_ptr - range_beg] = sum - 1;
- ++range_gap_ptr;
- sum = 0L;
- }
- }
-
- if (bv->get(j) == 1)
- range_gap[range_gap_ptr - range_beg] = sum;
-}
-
-
-void lblock_async_write_code(unsigned char* &slab, long &length, std::mutex &mtx,
- std::condition_variable &cv, bool &avail, bool &finished, std::string filename) {
- while (true) {
- // Wait until the passive buffer is available.
- std::unique_lock<std::mutex> lk(mtx);
- while (!avail && !finished)
- cv.wait(lk);
-
- if (!avail && finished) {
- // We're done, terminate the thread.
- lk.unlock();
- return;
- }
- lk.unlock();
-
- // Safely write the data to disk.
- utils::add_objects_to_file(slab, length, filename);
-
- // Let the caller know that the I/O thread finished writing.
- lk.lock();
- avail = false;
- lk.unlock();
- cv.notify_one();
- }
-}
-
-
-//==============================================================================
-// Given the gap array of the block (representation using 2 bytes per elements)
-// and the gap array of the left half-block wrt right half-block (bitvector
-// representation), compute the gap array (wrt tail) of the left half-block
-// and write to a given file using v-byte encoding.
-//
-// The whole computation is performed under given ram budget. It is fully
-// parallelized and uses asynchronous I/O as much as possible.
-//==============================================================================
-void compute_left_gap(long left_block_size, long right_block_size,
- const gap_array_2n *block_gap, bitvector *bv, std::string out_filename,
- long max_threads, long ram_budget) {
- long block_size = left_block_size + right_block_size;
- long left_gap_size = left_block_size + 1;
-
- // NOTE: we require that bv has room for one extra bit at the end
- // which we use as a sentinel. The actual value of that bit
- // prior to calling this function does not matter.
- bv->reset(block_size);
- long bv_size = block_size + 1;
-
- fprintf(stderr, " Compute gap array for left half-block: ");
- long compute_gap_start = utils::wclock();
-
- //----------------------------------------------------------------------------
- // STEP 1: Preprocess left_block_gap_bv for rank and select queries,
- // i.e., compute sparse_gap.
- //----------------------------------------------------------------------------
- ranksel_support *bv_ranksel = new ranksel_support(bv, bv_size, max_threads);
-
-
- //----------------------------------------------------------------------------
- // STEP 2: compute the values of the right gap array, one range at a time.
- //----------------------------------------------------------------------------
- long max_range_size = std::max(1L, ram_budget / (3L * (long)sizeof(long)));
- long n_ranges = (left_gap_size + max_range_size - 1) / max_range_size;
-
- // To ensure that asynchronous I/O is really taking
- // place, we try to make 8 parts.
- if (n_ranges < 8L) {
- max_range_size = (left_gap_size + 7L) / 8L;
- n_ranges = (left_gap_size + max_range_size - 1) / max_range_size;
- }
-
- long *range_gap = (long *)malloc(max_range_size * sizeof(long));
- unsigned char *active_vbyte_slab = (unsigned char *)malloc(max_range_size * sizeof(long));
- unsigned char *passive_vbyte_slab = (unsigned char *)malloc(max_range_size * sizeof(long));
- long active_vbyte_slab_length;
- long passive_vbyte_slab_length;
-
- // Used for communication with thread doing asynchronous writes.
- std::mutex mtx;
- std::condition_variable cv;
- bool avail = false;
- bool finished = false;
-
- // Start the thread doing asynchronous writes.
- std::thread *async_writer = new std::thread(lblock_async_write_code,
- std::ref(passive_vbyte_slab), std::ref(passive_vbyte_slab_length),
- std::ref(mtx), std::ref(cv), std::ref(avail), std::ref(finished),
- out_filename);
-
- for (long range_id = 0L; range_id < n_ranges; ++range_id) {
- // Compute the range [range_beg..range_end) of values in the left gap
- // array (which is indexed [0..left_gap_size)).
- long range_beg = range_id * max_range_size;
- long range_end = std::min(range_beg + max_range_size, left_gap_size);
- long range_size = range_end - range_beg;
-
- // 2.a
- //
- // Find the section in the bitvector that contains
- // the bits necessary to compute the answer.
- long bv_section_beg = 0L;
- long bv_section_end = 0L;
- if (range_beg > 0)
- bv_section_beg = bv_ranksel->select0(range_beg - 1) + 1;
- bv_section_end = bv_ranksel->select0(range_end - 1) + 1;
- long bv_section_size = bv_section_end - bv_section_beg;
-
- // Split the current bitvector section into
- // equal parts. Each thread handles one part.
- long max_part_size = (bv_section_size + max_threads - 1) / max_threads;
- long n_parts = (bv_section_size + max_part_size - 1) / max_part_size;
-
- parallel_utils::parallel_fill<long>(range_gap, range_size, 0L, max_threads);
-
- // Allocate arrays used to store the answers for part boundaries.
- long *res_sum = new long[n_parts];
- long *res_rank = new long[n_parts];
-
- std::thread **threads = new std::thread*[n_parts];
- for (long t = 0; t < n_parts; ++t) {
- long part_beg = bv_section_beg + t * max_part_size;
- long part_end = std::min(part_beg + max_part_size, bv_section_end);
-
- threads[t] = new std::thread(lblock_handle_bv_part, part_beg, part_end, range_beg,
- range_gap, block_gap, bv, bv_ranksel, std::ref(res_sum[t]), std::ref(res_rank[t]));
- }
-
- for (long t = 0; t < n_parts; ++t) threads[t]->join();
- for (long t = 0; t < n_parts; ++t) delete threads[t];
- delete[] threads;
-
- // Update range_gap with values computed at part boundaries.
- for (long t = 0; t < n_parts; ++t)
- range_gap[res_rank[t] - range_beg] += res_sum[t];
- delete[] res_sum;
- delete[] res_rank;
-
- // 2.c
- //
- // Convert the range_gap to the slab of vbyte encoding.
- active_vbyte_slab_length = parallel_utils::convert_array_to_vbyte_slab(
- range_gap, range_size, active_vbyte_slab, max_threads);
-
- // 2.d
- //
- // Schedule asynchronous write of the slab.
- // First, wait for the I/O thread to finish writing.
- std::unique_lock<std::mutex> lk(mtx);
- while (avail == true)
- cv.wait(lk);
-
- // Set the new passive slab.
- std::swap(active_vbyte_slab, passive_vbyte_slab);
- passive_vbyte_slab_length = active_vbyte_slab_length;
-
- // Let the I/O thread know that the slab is waiting.
- avail = true;
- lk.unlock();
- cv.notify_one();
- }
-
- // Let the I/O thread know that we're done.
- std::unique_lock<std::mutex> lk(mtx);
- finished = true;
- lk.unlock();
- cv.notify_one();
-
- // Wait for the thread to finish.
- async_writer->join();
-
- // Clean up.
- delete async_writer;
- delete bv_ranksel;
- free(range_gap);
- free(active_vbyte_slab);
- free(passive_vbyte_slab);
-
- long double compute_gap_time = utils::wclock() - compute_gap_start;
- long double compute_gap_speed = (block_size / (1024.L * 1024)) / compute_gap_time;
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", compute_gap_time, compute_gap_speed);
-}
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_COMPUTE_LEFT_GAP_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/compute_right_gap.h b/exttools/pSAscan-0.1.0/src/psascan_src/compute_right_gap.h
deleted file mode 100644
index 9b28d7fb..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/compute_right_gap.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/**
- * @file src/psascan_src/compute_right_gap.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_COMPUTE_RIGHT_GAP_H_INCLUDED
-#define __PSASCAN_SRC_COMPUTE_RIGHT_GAP_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-#include <algorithm>
-
-#include "bitvector.h"
-#include "ranksel_support.h"
-#include "gap_array.h"
-#include "parallel_utils.h"
-
-
-namespace psascan_private {
-
-//==============================================================================
-// Compute the range_gap values corresponging to bv[part_beg..part_end).
-//==============================================================================
-void rblock_handle_bv_part(long part_beg, long part_end, long range_beg,
- long *range_gap, const gap_array_2n *block_gap, const bitvector *bv,
- const ranksel_support *bv_ranksel, long &res_sum, long &res_rank) {
- size_t excess_ptr = std::lower_bound(block_gap->m_excess.begin(),
- block_gap->m_excess.end(), part_beg) - block_gap->m_excess.begin();
-
- // Initialize j.
- long j = part_beg;
-
- // Compute gap[j].
- long gap_j = block_gap->m_count[j];
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) {
- ++excess_ptr;
- gap_j += (1L << 16);
- }
-
- // Initialize sum.
- long sum = gap_j;
-
- while (j != part_end - 1 && bv->get(j) == 0) {
- // Update j.
- ++j;
-
- // Compute gap[j].
- gap_j = block_gap->m_count[j];
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) {
- ++excess_ptr;
- gap_j += (1L << 16);
- }
-
- // Update sum.
- sum += gap_j;
- }
-
- // Store gap[part_beg] + .. + gap[j] and bv.rank(part_beg) (== bv.rank(j)).
- res_sum = sum;
- res_rank = bv_ranksel->rank(part_beg);
-
- if (j == part_end - 1)
- return;
-
- sum = 0L;
- long range_gap_ptr = res_rank + 1;
- while (j != part_end - 1) {
- // Update j.
- ++j;
-
- // Compute gap[j].
- gap_j = block_gap->m_count[j];
- while (excess_ptr < block_gap->m_excess.size() && block_gap->m_excess[excess_ptr] == j) {
- ++excess_ptr;
- gap_j += (1L << 16);
- }
-
- // Update sum.
- sum += gap_j;
-
- // Update range_gap.
- if (bv->get(j) == 1) {
- range_gap[range_gap_ptr - range_beg] = sum;
- ++range_gap_ptr;
- sum = 0L;
- }
- }
-
- if (bv->get(j) == 0)
- range_gap[range_gap_ptr - range_beg] = sum;
-}
-
-
-void rblock_async_write_code(unsigned char* &slab, long &length, std::mutex &mtx,
- std::condition_variable &cv, bool &avail, bool &finished, std::string filename) {
- while (true) {
- // Wait until the passive buffer is available.
- std::unique_lock<std::mutex> lk(mtx);
- while (!avail && !finished)
- cv.wait(lk);
-
- if (!avail && finished) {
- // We're done, terminate the thread.
- lk.unlock();
- return;
- }
- lk.unlock();
-
- // Safely write the data to disk.
- utils::add_objects_to_file(slab, length, filename);
-
- // Let the caller know that the I/O thread finished writing.
- lk.lock();
- avail = false;
- lk.unlock();
- cv.notify_one();
- }
-}
-
-
-//==============================================================================
-// Given the gap array of the block (representation using 2 bytes per elements)
-// and the gap array of the left half-block wrt right half-block (bitvector
-// representation), compute the gap array (wrt tail) of the right half-block
-// and write to a given file using v-byte encoding.
-//
-// The whole computation is performed under given ram budget. It is fully
-// parallelized and uses asynchronous I/O as much as possible.
-//==============================================================================
-void compute_right_gap(long left_block_size, long right_block_size,
- const gap_array_2n *block_gap, bitvector *bv, std::string out_filename,
- long max_threads, long ram_budget) {
- long block_size = left_block_size + right_block_size;
- long right_gap_size = right_block_size + 1;
-
- // NOTE: we require that bv has room for one extra bit at the end
- // which we use as a sentinel. The actual value of that bit
- // prior to calling this function does not matter.
- bv->set(block_size);
- long bv_size = block_size + 1;
-
- fprintf(stderr, " Compute gap array for right half-block: ");
- long compute_gap_start = utils::wclock();
-
- //----------------------------------------------------------------------------
- // STEP 1: Preprocess left_block_gap_bv for rank and select queries,
- // i.e., compute sparse_gap.
- //----------------------------------------------------------------------------
- ranksel_support *bv_ranksel = new ranksel_support(bv, bv_size, max_threads);
-
-
- //----------------------------------------------------------------------------
- // STEP 2: compute the values of the right gap array, one range at a time.
- //----------------------------------------------------------------------------
- long max_range_size = std::max(1L, ram_budget / (3L * (long)sizeof(long)));
- long n_ranges = (right_gap_size + max_range_size - 1) / max_range_size;
-
- // To ensure that asynchronous I/O is really taking
- // place, we try to make 8 parts.
- if (n_ranges < 8L) {
- max_range_size = (right_gap_size + 7L) / 8L;
- n_ranges = (right_gap_size + max_range_size - 1) / max_range_size;
- }
-
- long *range_gap = (long *)malloc(max_range_size * sizeof(long));
- unsigned char *active_vbyte_slab = (unsigned char *)malloc(max_range_size * sizeof(long));
- unsigned char *passive_vbyte_slab = (unsigned char *)malloc(max_range_size * sizeof(long));
- long active_vbyte_slab_length;
- long passive_vbyte_slab_length;
-
- // Used for communication with thread doing asynchronous writes.
- std::mutex mtx;
- std::condition_variable cv;
- bool avail = false;
- bool finished = false;
-
- // Start the thread doing asynchronous writes.
- std::thread *async_writer = new std::thread(rblock_async_write_code,
- std::ref(passive_vbyte_slab), std::ref(passive_vbyte_slab_length),
- std::ref(mtx), std::ref(cv), std::ref(avail), std::ref(finished),
- out_filename);
-
- for (long range_id = 0L; range_id < n_ranges; ++range_id) {
- // Compute the range [range_beg..range_end) of values in the right gap
- // array (which is indexed [0..right_gap_size)).
- long range_beg = range_id * max_range_size;
- long range_end = std::min(range_beg + max_range_size, right_gap_size);
- long range_size = range_end - range_beg;
-
- // 2.a
- //
- // Find the section in the bitvector that contains
- // the bits necessary to compute the answer.
- long bv_section_beg = 0L;
- long bv_section_end = 0L;
- if (range_beg > 0)
- bv_section_beg = bv_ranksel->select1(range_beg - 1) + 1;
- bv_section_end = bv_ranksel->select1(range_end - 1) + 1;
- long bv_section_size = bv_section_end - bv_section_beg;
-
- // Split the current bitvector section into
- // equal parts. Each thread handles one part.
- long max_part_size = (bv_section_size + max_threads - 1) / max_threads;
- long n_parts = (bv_section_size + max_part_size - 1) / max_part_size;
-
- parallel_utils::parallel_fill<long>(range_gap, range_size, 0L, max_threads);
-
- // Allocate arrays used to store the answers for part boundaries.
- long *res_sum = new long[n_parts];
- long *res_rank = new long[n_parts];
-
- std::thread **threads = new std::thread*[n_parts];
- for (long t = 0; t < n_parts; ++t) {
- long part_beg = bv_section_beg + t * max_part_size;
- long part_end = std::min(part_beg + max_part_size, bv_section_end);
-
- threads[t] = new std::thread(rblock_handle_bv_part, part_beg, part_end, range_beg,
- range_gap, block_gap, bv, bv_ranksel, std::ref(res_sum[t]), std::ref(res_rank[t]));
- }
-
- for (long t = 0; t < n_parts; ++t) threads[t]->join();
- for (long t = 0; t < n_parts; ++t) delete threads[t];
- delete[] threads;
-
- // Update range_gap with values computed at part boundaries.
- for (long t = 0; t < n_parts; ++t)
- range_gap[res_rank[t] - range_beg] += res_sum[t];
- delete[] res_sum;
- delete[] res_rank;
-
- // 2.c
- //
- // Convert the range_gap to the slab of vbyte encoding.
- active_vbyte_slab_length = parallel_utils::convert_array_to_vbyte_slab(
- range_gap, range_size, active_vbyte_slab, max_threads);
-
- // 2.d
- //
- // Schedule asynchronous write of the slab.
- // First, wait for the I/O thread to finish writing.
- std::unique_lock<std::mutex> lk(mtx);
- while (avail == true)
- cv.wait(lk);
-
- // Set the new passive slab.
- std::swap(active_vbyte_slab, passive_vbyte_slab);
- passive_vbyte_slab_length = active_vbyte_slab_length;
-
- // Let the I/O thread know that the slab is waiting.
- avail = true;
- lk.unlock();
- cv.notify_one();
- }
-
- // Let the I/O thread know that we're done.
- std::unique_lock<std::mutex> lk(mtx);
- finished = true;
- lk.unlock();
- cv.notify_one();
-
- // Wait for the thread to finish.
- async_writer->join();
-
- // Clean up.
- delete async_writer;
- delete bv_ranksel;
- free(range_gap);
- free(active_vbyte_slab);
- free(passive_vbyte_slab);
-
- long double compute_gap_time = utils::wclock() - compute_gap_start;
- long double compute_gap_speed = (block_size / (1024.L * 1024)) / compute_gap_time;
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", compute_gap_time, compute_gap_speed);
-}
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_COMPUTE_RIGHT_GAP_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/distributed_file.h b/exttools/pSAscan-0.1.0/src/psascan_src/distributed_file.h
deleted file mode 100644
index 6d77cecd..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/distributed_file.h
+++ /dev/null
@@ -1,360 +0,0 @@
-/**
- * @file src/psascan_src/distributed_file.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_DISTRIBUTED_FILE_H_INCLUDED
-#define __PSASCAN_SRC_DISTRIBUTED_FILE_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include <thread>
-#include <mutex>
-#include <algorithm>
-#include <condition_variable>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-template<typename value_type>
-struct distributed_file {
- distributed_file(std::string filename_base, long max_bytes) {
- m_state = STATE_INIT;
- m_max_items = std::max(1UL, max_bytes / sizeof(value_type));
- m_filename = filename_base + ".distrfile." + utils::random_string_hash();
- }
-
- distributed_file(std::string filename_base, long max_bytes,
- const value_type *begin, const value_type *end) {
- m_state = STATE_INIT;
- m_max_items = std::max(1UL, max_bytes / sizeof(value_type));
- m_filename = filename_base + ".distrfile." + utils::random_string_hash();
-
- initialize_writing();
- write(begin, end);
- finish_writing();
- }
-
-
- void initialize_writing() {
- if (m_state != STATE_INIT) {
- fprintf(stderr, "\nError: initializing writing in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
-
- m_state = STATE_WRITING;
- m_total_write = 0;
- m_files_cnt = 0;
- make_new_file();
- }
-
- void write(const value_type *begin, const value_type *end) {
- if (m_state != STATE_WRITING) {
- fprintf(stderr, "\nError: write in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
-
- // Fill the current file.
- if (m_cur_file_write != m_max_items) {
- long left = m_max_items - m_cur_file_write;
- long towrite = std::min(left, end - begin);
- utils::add_objects_to_file(begin, towrite, m_file);
- m_cur_file_write += towrite;
- m_total_write += towrite;
- begin += towrite;
- }
-
- // Write remaining items.
- while (begin < end) {
- std::fclose(m_file);
- make_new_file();
-
- long towrite = std::min(m_max_items, end - begin);
- utils::add_objects_to_file(begin, towrite, m_file);
- m_cur_file_write += towrite;
- m_total_write += towrite;
- begin += towrite;
- }
- }
-
- void finish_writing() {
- if (m_state != STATE_WRITING) {
- fprintf(stderr, "\nError: finishing writing when in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
- if (m_cur_file_write == 0) {
- fprintf(stderr, "\nError: nothing was ever written to %s\n", m_filename.c_str());
- std::exit(EXIT_FAILURE);
- }
-
- std::fclose(m_file);
- m_state = STATE_WRITTEN;
- }
-
- void initialize_reading(long bufsize = (4 << 20)) {
- if (m_state != STATE_WRITTEN) {
- fprintf(stderr, "\nError: initializing reading in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
-
- // Compute buffer size.
- m_state = STATE_READING;
- long items = std::max(2UL, (bufsize + sizeof(value_type) - 1) / sizeof(value_type));
- m_buf_size = items / 2L;
-
- // Reset counters.
- m_active_buf_filled = 0;
- m_passive_buf_filled = 0;
- m_active_buf_pos = 0;
- m_total_read_buf = 0;
- m_total_read_user = 0;
- m_cur_file = -1;
-
- // Initialize buffers.
- m_active_buf = (value_type *)malloc(m_buf_size * sizeof(value_type));
- m_passive_buf = (value_type *)malloc(m_buf_size * sizeof(value_type));
-
- // Start the I/O thread and immediatelly start reading.
- m_avail = true;
- m_finished = false;
- m_thread = new std::thread(async_io_code<value_type>, this);
- }
-
- inline value_type read() {
- if (m_state != STATE_READING) {
- fprintf(stderr, "\nError: reading in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
-
- if (m_active_buf_pos == m_active_buf_filled)
- receive_new_buffer();
-
- m_total_read_user++;
- return m_active_buf[m_active_buf_pos++];
- }
-
- void finish_reading() {
- if (m_state != STATE_READING) {
- fprintf(stderr, "\nError: finishing reading in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
-
- if (m_total_read_buf != m_total_read_user || m_total_read_user != m_total_write) {
- fprintf(stderr, "\nError: not all elems were read from distributed file %s\n", m_filename.c_str());
- std::exit(EXIT_FAILURE);
- }
-
- // Let the I/O thread know that we are done.
- std::unique_lock<std::mutex> lk(m_mutex);
- m_finished = true;
- lk.unlock();
- m_cv.notify_one();
-
- // Wait for the thread to finish.
- m_thread->join();
-
- // Clean up.
- delete m_thread;
- close_and_destroy_cur_file();
- free(m_active_buf);
- free(m_passive_buf);
-
- // Enter the terminal state.
- m_state = STATE_READ;
- }
-
- std::string state_string() const {
- switch(m_state) {
- case STATE_INIT: return "STATE_INIT";
- case STATE_WRITING: return "STATE_WRITING";
- case STATE_WRITTEN: return "STATE_WRITTEN";
- case STATE_READING: return "STATE_READING";
- case STATE_READ: return "STATE_READ";
- default: return "undefined state";
- }
- }
-
- void close_and_destroy_cur_file() {
- if (m_state != STATE_READING) {
- fprintf(stderr, "\nError: destroying a file in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
-
- if (!m_file) {
- fprintf(stderr, "\nError: deleting a NULL file\n");
- std::exit(EXIT_FAILURE);
- }
-
- std::fclose(m_file);
- std::string cur_fname = m_filename + ".part" + utils::intToStr(m_cur_file);
- utils::file_delete(cur_fname);
- }
-
- template<typename T>
- static void async_io_code(distributed_file<T> *file) {
- while (true) {
- // Wait until the passive buffer is available.
- std::unique_lock<std::mutex> lk(file->m_mutex);
- while (!(file->m_avail) && !(file->m_finished))
- file->m_cv.wait(lk);
-
- if (!(file->m_avail) && (file->m_finished)) {
- // We're done, terminate the thread.
- lk.unlock();
- return;
- }
- lk.unlock();
-
- // This should never happen.
- if (file->m_total_read_buf == file->m_total_write) {
- fprintf(stderr, "\nError: trying to read past the end of file\n");
- std::exit(EXIT_FAILURE);
- }
-
- // Safely process the passive buffer.
- // Check if we need to open next file.
- if (file->m_cur_file == -1 || file->m_cur_file_read == file->m_max_items) {
- if (file->m_cur_file != -1)
- file->close_and_destroy_cur_file();
- file->open_next_file();
- }
-
- // Read the data from disk.
- long file_left = file->m_max_items - file->m_cur_file_read;
- long items_left = file->m_total_write - file->m_total_read_buf;
- long left = std::min(file_left, items_left);
- file->m_passive_buf_filled = std::min(left, file->m_buf_size);
- file->m_cur_file_read += file->m_passive_buf_filled;
- file->m_total_read_buf += file->m_passive_buf_filled;
- utils::read_n_objects_from_file(file->m_passive_buf,
- file->m_passive_buf_filled, file->m_file);
-
- // Let the caller know that the I/O thread finished reading.
- lk.lock();
- file->m_avail = false;
- lk.unlock();
- file->m_cv.notify_one();
- }
- }
-
- void receive_new_buffer() {
- if (m_state != STATE_READING) {
- fprintf(stderr, "\nError: refilling in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
-
- // Wait until the I/O thread finishes reading the revious
- // buffer. Most of the time this step is instantaneous.
- std::unique_lock<std::mutex> lk(m_mutex);
- while (m_avail == true)
- m_cv.wait(lk);
-
- // Set the new active buffer.
- std::swap(m_active_buf, m_passive_buf);
- m_active_buf_filled = m_passive_buf_filled;
- m_active_buf_pos = 0;
-
- // Let the I/O thead know that it can now
- // prefetch another buffer.
- m_avail = (m_total_read_buf < m_total_write);
- lk.unlock();
- m_cv.notify_one();
- }
-
- void open_next_file() {
- if (m_state != STATE_READING) {
- fprintf(stderr, "\nError: opening a new file in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
-
- ++m_cur_file;
- m_file = utils::open_file(m_filename + ".part" + utils::intToStr(m_cur_file), "r");
- m_cur_file_read = 0;
- }
-
- void make_new_file() {
- if (m_state != STATE_WRITING) {
- fprintf(stderr, "\nError: making new file in state %s\n", state_string().c_str());
- std::exit(EXIT_FAILURE);
- }
-
- m_file = utils::open_file(m_filename + ".part" + utils::intToStr(m_files_cnt), "w");
- ++m_files_cnt;
- m_cur_file_write = 0;
- }
-
-
- enum { STATE_INIT, // right after creating (before init_writing)
- STATE_WRITING, // after initialize_writing, writing possible
- STATE_WRITTEN, // after finish_writing, waiting for initialize_reading
- STATE_READING, // after initialize_reading, reading possible
- STATE_READ // after finish_reading, waiting for death
- } m_state;
-
- std::FILE *m_file; // file handler
- std::string m_filename; // file name base
- long m_max_items; // max items per file
-
- // Buffers used for asynchronous reading.
- value_type *m_active_buf;
- value_type *m_passive_buf;
- long m_buf_size;
- long m_active_buf_pos;
- long m_active_buf_filled;
- long m_passive_buf_filled;
-
- // Various housekeeping statistics about the number of items.
- long m_cur_file_write; // number of items written to a current file
- long m_total_write; // total number of written items
- long m_cur_file_read; // number of items read from the current file
- long m_total_read_buf; // total number of items read from files into buffers
- long m_total_read_user; // total number of items read by the user
-
- // Used to keep track of file count.
- long m_files_cnt; // counts the files during writing
- long m_cur_file; // iterates through [0..m_files_cnt) during reading
-
- // For synchronization with thread doing asynchronous reading.
- std::thread *m_thread;
- std::mutex m_mutex;
- std::condition_variable m_cv;
- bool m_finished;
- bool m_avail;
-};
-
-} // psascan_private
-
-#endif // __PSASCAN_SRC_DISTRIBUTED_FILE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/em_compute_initial_ranks.h b/exttools/pSAscan-0.1.0/src/psascan_src/em_compute_initial_ranks.h
deleted file mode 100644
index 5ef2858e..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/em_compute_initial_ranks.h
+++ /dev/null
@@ -1,567 +0,0 @@
-/**
- * @file src/psascan_src/em_compute_initial_ranks.h
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_EM_COMPUTE_INITIAL_RANKS_INCLUDED
-#define __PSASCAN_SRC_EM_COMPUTE_INITIAL_RANKS_INCLUDED
-
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <thread>
-
-#include "approx_rank.h"
-#include "sparse_isa.h"
-#include "background_block_reader.h"
-#include "background_chunk_reader.h"
-#include "multifile_bit_stream_reader.h"
-#include "utils.h"
-
-
-namespace psascan_private {
-
-// #define EM_STARTING_POS_MODULE_DEBUG_MODE
-
-inline int lcp_compare(
- const unsigned char *text, // only text[block_suf_beg..block_end) will be accessed
- long text_length,
- long block_end, // wrt to text beg
- long block_suf_beg, // wrt to text beg
- const unsigned char *pat, // only pat[lcp..pat_length) will be accessed
- long pat_beg, // wrt to text beg
- long pat_length,
- multifile_bit_stream_reader &gt_reader,
- long &lcp) {
- while (block_suf_beg + lcp < block_end && lcp < pat_length &&
- text[block_suf_beg + lcp] == pat[lcp]) ++lcp;
- if (block_suf_beg + lcp >= block_end) {
- if (gt_reader.access(text_length - (pat_beg + (block_end - block_suf_beg)))) return 1;
- else return -1;
- } else if (lcp == pat_length) {
- if (pat_beg + pat_length >= text_length) return -1;
- else return 0;
- } else {
- if (pat[lcp] > text[block_suf_beg + lcp]) return 1;
- else return -1;
- }
-}
-
-template<typename saidx_t>
-void refine_range(
- const unsigned char *block,
- const saidx_t *block_psa,
- long block_beg, // wrt to text beg
- long block_end, // same here
- long pat_beg, // same here
- long text_length,
- long left,
- long right,
- long old_lcp,
- long new_lcp,
- const unsigned char *pat, // only pat[old_lcp..new_lcp) can and will be accessed
- multifile_bit_stream_reader &gt_reader,
- long &newleft,
- long &newright) {
- long low = left - 1;
- long high = right;
- long llcp = old_lcp;
- long rlcp = old_lcp;
-
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE
- long min_discrepancy = utils::random_long(0L, 10L);
- long balancing_factor = utils::random_long(1L, 10L);
-#else
- static const long min_discrepancy = (1L << 16);
- static const long balancing_factor = 64L;
-#endif
-
- const unsigned char *text = block - block_beg;
- while (low + 1 != high) {
- // Invariant: newleft is in the range (low, high].
- long lcp = std::min(llcp, rlcp);
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else mid = (low + high) / 2;
-
- if (lcp_compare(text, text_length, block_end, block_beg + (long)block_psa[mid],
- pat, pat_beg, new_lcp, gt_reader, lcp) <= 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- newleft = high;
-
- if (rlcp >= new_lcp) {
- high = right;
- rlcp = old_lcp;
-
- while (low + 1 != high) {
- // Invariant: newright is in the range (low, high].
- long lcp = std::min(llcp, rlcp);
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else mid = (low + high) / 2;
-
- if (lcp_compare(text, text_length, block_end, block_beg + (long)block_psa[mid],
- pat, pat_beg, new_lcp, gt_reader, lcp) < 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- }
- newright = high;
-}
-
-template<typename saidx_t>
-void em_compute_single_initial_rank(
- const unsigned char *block,
- const saidx_t *block_psa,
- long block_beg, // wrt to text beg
- long block_end, // same here
- long pat_beg, // same here
- long text_length,
- long max_lcp,
- std::string text_filename,
- const multifile *tail_gt_begin_reversed,
- std::pair<long, long> &result) {
- if (pat_beg == text_length) {
- result = std::make_pair(0, 0);
- return;
- }
-
- long block_size = block_end - block_beg;
- long pat_end = pat_beg + max_lcp;
-
- multifile_bit_stream_reader gt_reader(tail_gt_begin_reversed);
-
- // Reads text[pat_beg..pat_end) in chunks.
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE
- long chunk_length = utils::random_long(1L, 10L);
- background_chunk_reader *chunk_reader =
- new background_chunk_reader(text_filename, pat_beg, pat_end, chunk_length);
-#else
- background_chunk_reader *chunk_reader =
- new background_chunk_reader(text_filename, pat_beg, pat_end);
-#endif
-
- // The current range is [left, right).
- long left = 0;
- long right = block_size;
- long lcp = 0;
-
- while (left != right && lcp < max_lcp) {
- long this_chunk_length = std::min(max_lcp - lcp, chunk_reader->get_chunk_size());
- long new_lcp = lcp + this_chunk_length;
- chunk_reader->wait(pat_beg + new_lcp);
-
- // Invariant:
- // reader->chunk[0..chunk_length) = pattern[lcp..new_lcp).
- long newleft = 0;
- long newright = 0;
- refine_range(block, block_psa, block_beg, block_end, pat_beg, text_length, left,
- right, lcp, new_lcp, chunk_reader->m_chunk - lcp, gt_reader, newleft, newright);
- left = newleft;
- right = newright;
- lcp = new_lcp;
- }
-
- delete chunk_reader;
-
- result = std::make_pair(left, right);
-}
-
-template<typename saidx_t>
-void em_compute_initial_ranks(
- const unsigned char *block,
- const saidx_t *block_psa,
- const unsigned char *block_pbwt,
- long i0,
- long block_beg, // wrt to text beg
- long block_end, // same here
- long text_length,
- std::string text_filename,
- const multifile *tail_gt_begin_reversed,
- std::vector<long> &result,
- long max_threads,
- long tail_end,
- long initial_rank_after_tail) {
- // Note, that bits of tail_gt_begin_reversed are indexed in the
- // range [text_length - tail_end.. text_length - block_end). This
- // is because the same multifile is then used in the streaming and
- // for streaming is much more natural to use this indexing.
- long block_length = block_end - block_beg;
- long tail_length = tail_end - block_end;
- long stream_max_block_size = (tail_length + max_threads - 1) / max_threads;
- long n_threads = (tail_length + stream_max_block_size - 1) / stream_max_block_size;
-
- std::vector<std::pair<long, long> > ranges(n_threads);
- std::thread **threads = new std::thread*[n_threads];
-
- for (int t = n_threads - 1; t >= 0; --t) {
- long stream_block_beg = block_end + t * stream_max_block_size;
- long stream_block_end = std::min(stream_block_beg + stream_max_block_size, tail_end);
- long stream_block_size = stream_block_end - stream_block_beg;
-
- threads[t] = new std::thread(em_compute_single_initial_rank<saidx_t>,
- block, block_psa, block_beg, block_end, stream_block_beg, text_length,
- stream_block_size, text_filename, tail_gt_begin_reversed, std::ref(ranges[t]));
- }
-
- for (int t = 0; t < n_threads; ++t) threads[t]->join();
- for (int t = 0; t < n_threads; ++t) delete threads[t];
- delete[] threads;
-
- // Refine ranges until all are single elements.
- result.resize(n_threads);
-
- bool nontrivial_range = false;
- for (long t = 0; t < n_threads; ++t)
- if (ranges[t].first != ranges[t].second)
- nontrivial_range = true;
-
- if (nontrivial_range) {
- multifile_bit_stream_reader *gt_reader =
- new multifile_bit_stream_reader(tail_gt_begin_reversed);
-
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE
- typedef approx_rank<1L> rank_type;
- typedef sparse_isa<rank_type, saidx_t, 1L> isa_type;
-#else
- typedef approx_rank<8L> rank_type;
- typedef sparse_isa<rank_type, saidx_t, 8L> isa_type;
-#endif
- rank_type *pbwt_rank = new rank_type(block_pbwt, block_length, max_threads);
- isa_type *block_sparse_isa = new isa_type(block_psa, block, block_length, i0, pbwt_rank, max_threads);
-
- long prev_rank = initial_rank_after_tail;
- for (long t = n_threads - 1; t >= 0; --t) {
- long stream_block_beg = block_end + t * stream_max_block_size;
- long stream_block_end = std::min(stream_block_beg + stream_max_block_size, tail_end);
- long stream_block_size = stream_block_end - stream_block_beg;
-
- long left = ranges[t].first;
- long right = ranges[t].second;
-
- while (left != right) {
- // Valid values for mid are in [left..right).
- long mid = (left + right) / 2;
-
- if ((long)block_psa[mid] + stream_block_size >= block_length) {
- if (gt_reader->access(text_length - (stream_block_beg + (block_length - (long)block_psa[mid])))) left = mid + 1;
- else right = mid;
- } else {
- long j = (long)block_psa[mid] + stream_block_size;
- if (block_sparse_isa->query(j) < prev_rank) left = mid + 1;
- else right = mid;
- }
- }
-
- result[t] = left;
- prev_rank = result[t];
- }
-
- delete pbwt_rank;
- delete block_sparse_isa;
- delete gt_reader;
- } else {
- for (long t = 0; t < n_threads; ++t)
- result[t] = ranges[t].first;
- }
-}
-
-int lcp_compare_2(
- const unsigned char *text, // only text[block_suf_beg..block_end) will be accessed
- long text_length,
- long block_end, // wrt to text beg
- long block_suf_beg, // wrt to text beg
- const unsigned char *pat, // only pat[lcp..pat_length) will be accessed
- long pat_beg, // wrt to text beg
- long pat_length,
- long tail_begin, // wrt to text beg
- background_block_reader *mid_block_reader,
- multifile_bit_stream_reader &gt_reader,
- long &lcp) {
- while (block_suf_beg + lcp < block_end && lcp < pat_length &&
- text[block_suf_beg + lcp] == pat[lcp]) ++lcp;
- if (block_suf_beg + lcp < block_end && lcp < pat_length) {
- if (pat[lcp] > text[block_suf_beg + lcp]) return 1;
- else return -1;
- }
-
- if (block_suf_beg + lcp >= block_end && block_end < tail_begin && lcp < pat_length) {
- // To finish the comparison, we need to access symbols from the mid block.
- // First, wait until enough symbols are available.
- mid_block_reader->wait(std::min(tail_begin, block_suf_beg + pat_length) - block_end);
-
- // Now continue the comparison.
- const unsigned char *text2 = mid_block_reader->m_data - block_end;
- while (block_suf_beg + lcp < tail_begin && lcp < pat_length &&
- text2[block_suf_beg + lcp] == pat[lcp]) ++lcp;
- if (block_suf_beg + lcp < tail_begin && lcp < pat_length) {
- if (pat[lcp] > text2[block_suf_beg + lcp]) return 1;
- else return -1;
- }
- }
-
- if (block_suf_beg + lcp >= tail_begin) {
- // Use gt to resolve comparison.
- if (gt_reader.access(text_length - (pat_beg + (tail_begin - block_suf_beg)))) return 1;
- else return -1;
- } else { // lcp == pat_length
- if (pat_beg + pat_length >= text_length) return -1;
- else return 0;
- }
-}
-
-template<typename saidx_t>
-void refine_range_2(
- const unsigned char *block,
- const saidx_t *block_psa,
- long block_beg, // wrt to text beg
- long block_end, // same here
- long pat_beg, // same here
- long tail_begin,
- background_block_reader *mid_block_reader,
- long text_length,
- long left,
- long right,
- long old_lcp,
- long new_lcp,
- const unsigned char *pat, // only pat[old_lcp..new_lcp) can and will be accessed
- multifile_bit_stream_reader &gt_reader,
- long &newleft,
- long &newright) {
- long low = left - 1;
- long high = right;
- long llcp = old_lcp;
- long rlcp = old_lcp;
-
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE
- long min_discrepancy = utils::random_long(0L, 10L);
- long balancing_factor = utils::random_long(1L, 10L);
-#else
- static const long min_discrepancy = (1L << 16);
- static const long balancing_factor = 64L;
-#endif
-
- const unsigned char *text = block - block_beg;
- while (low + 1 != high) {
- // Invariant: newleft is in the range (low, high].
- long lcp = std::min(llcp, rlcp);
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else mid = (low + high) / 2;
-
- if (lcp_compare_2(text, text_length, block_end, block_beg + (long)block_psa[mid],
- pat, pat_beg, new_lcp, tail_begin, mid_block_reader, gt_reader, lcp) <= 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- newleft = high;
-
- if (rlcp >= new_lcp) {
- high = right;
- rlcp = old_lcp;
-
- while (low + 1 != high) {
- // Invariant: newright is in the range (low, high].
- long lcp = std::min(llcp, rlcp);
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else mid = (low + high) / 2;
-
- if (lcp_compare_2(text, text_length, block_end, block_beg + (long)block_psa[mid],
- pat, pat_beg, new_lcp, tail_begin, mid_block_reader, gt_reader, lcp) < 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- }
- newright = high;
-}
-
-template<typename saidx_t>
-void em_compute_single_initial_rank_2(
- const unsigned char *block,
- const saidx_t *block_psa,
- long block_beg, // wrt to text beg
- long block_end, // same here
- long pat_beg, // same here
- long text_length,
- long max_lcp,
- long tail_begin,
- background_block_reader *mid_block_reader,
- std::string text_filename,
- const multifile *tail_gt_begin_reversed,
- long &result) {
- if (pat_beg == text_length) {
- result = 0;
- return;
- }
-
- long block_size = block_end - block_beg;
- long pat_end = std::min(text_length, pat_beg + max_lcp);
-
- multifile_bit_stream_reader gt_reader(tail_gt_begin_reversed);
-
- // Reads text[pat_beg..pat_end) in chunks.
-#ifdef EM_STARTING_POS_MODULE_DEBUG_MODE
- long chunk_length = utils::random_long(1L, 10L);
- background_chunk_reader *chunk_reader =
- new background_chunk_reader(text_filename, pat_beg, pat_end, chunk_length);
-#else
- background_chunk_reader *chunk_reader =
- new background_chunk_reader(text_filename, pat_beg, pat_end);
-#endif
-
- // The current range is [left, right).
- long left = 0;
- long right = block_size;
- long lcp = 0;
-
- while (left != right && lcp < max_lcp) {
- long this_chunk_length = std::min(max_lcp - lcp, chunk_reader->get_chunk_size());
- long new_lcp = lcp + this_chunk_length;
- chunk_reader->wait(pat_beg + new_lcp);
-
- // Invariant:
- // reader->chunk[0..chunk_length) = pattern[lcp..new_lcp).
- long newleft = 0;
- long newright = 0;
- refine_range_2(block, block_psa, block_beg, block_end, pat_beg, tail_begin,
- mid_block_reader, text_length, left, right, lcp, new_lcp,
- chunk_reader->m_chunk - lcp, gt_reader, newleft, newright);
- left = newleft;
- right = newright;
- lcp = new_lcp;
- }
- result = left;
-
- delete chunk_reader;
-}
-
-template<typename saidx_t>
-void em_compute_initial_ranks(
- const unsigned char *block,
- const saidx_t *block_psa,
- long block_beg, // wrt to text beg
- long block_end, // same here
- long text_length,
- std::string text_filename,
- const multifile *tail_gt_begin_reversed,
- std::vector<long> &result,
- long max_threads,
- long tail_begin) {
- // Compute some initial parameters.
- long block_length = block_end - block_beg;
- long tail_length = text_length - tail_begin;
- long mid_block_beg = block_end;
- long mid_block_end = tail_begin;
- long mid_block_size = mid_block_end - mid_block_beg;
- long stream_max_block_size = (tail_length + max_threads - 1) / max_threads;
- long n_threads = (tail_length + stream_max_block_size - 1) / stream_max_block_size;
-
- // Start reading the text between the block and the tail in the backgrond.
- background_block_reader *mid_block_reader =
- new background_block_reader(text_filename, mid_block_beg, mid_block_size);
-
- // Compute the initial ranks.
- std::vector<long> res(n_threads);
- std::thread **threads = new std::thread*[n_threads];
-
- for (int t = 0; t < n_threads; ++t) {
- long stream_block_beg = tail_begin + t * stream_max_block_size;
- long max_lcp = std::min(block_length + mid_block_size, text_length - stream_block_beg);
-
- threads[t] = new std::thread(em_compute_single_initial_rank_2<saidx_t>,
- block, block_psa, block_beg, block_end, stream_block_beg, text_length,
- max_lcp, tail_begin, mid_block_reader, text_filename,
- tail_gt_begin_reversed, std::ref(res[t]));
- }
-
- for (int t = 0; t < n_threads; ++t) threads[t]->join();
- for (int t = 0; t < n_threads; ++t) delete threads[t];
- delete[] threads;
-
- mid_block_reader->stop();
- delete mid_block_reader;
-
- result = res;
-}
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_EM_COMPUTE_INITIAL_RANKS_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/gap_array.h b/exttools/pSAscan-0.1.0/src/psascan_src/gap_array.h
deleted file mode 100644
index 364fab2f..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/gap_array.h
+++ /dev/null
@@ -1,535 +0,0 @@
-/**
- * @file src/psascan_src/gap_array.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_GAP_ARRAY_H_INCLUDED
-#define __PSASCAN_SRC_GAP_ARRAY_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <vector>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <algorithm>
-#include <parallel/algorithm>
-
-#include "utils.h"
-#include "bitvector.h"
-#include "parallel_utils.h"
-#include "async_stream_writer.h"
-
-
-namespace psascan_private {
-
-struct buffered_gap_array {
- buffered_gap_array(long length, std::string storage_fname = std::string("")) {
- if (length <= 0L) {
- fprintf(stderr, "\nError: attempting to construct empty gap array.\n");
- std::exit(EXIT_FAILURE);
- }
-
- m_length = length;
- m_count = (unsigned char *)malloc(m_length);
- std::fill(m_count, m_count + m_length, 0);
-
- m_excess = new long[k_excess_limit];
-
- // File used to store excess values.
- m_storage_filename = storage_fname;
- if (!m_storage_filename.length())
- m_storage_filename = ".excess." + utils::random_string_hash();
-
- m_excess_filled = 0L;
- m_excess_disk = 0L;
- m_sorted_excess = NULL;
- m_sequential_read_initialized = false;
- }
-
- void add_excess(long x) {
- m_excess[m_excess_filled++] = x;
- if (m_excess_filled == k_excess_limit) {
- m_gap_writing_mutex.lock();
- m_excess_disk += m_excess_filled;
- utils::add_objects_to_file(m_excess, m_excess_filled, m_storage_filename);
- m_excess_filled = 0L;
- m_gap_writing_mutex.unlock();
- }
- }
-
- void flush_excess_to_disk() {
- if (m_excess_filled > 0) {
- utils::add_objects_to_file(m_excess, m_excess_filled, m_storage_filename);
- m_excess_disk += m_excess_filled;
- m_excess_filled = 0L;
- }
- }
-
- void start_sequential_access() {
- if (!m_sequential_read_initialized) {
- m_sequential_read_initialized = true;
- m_total_excess = m_excess_filled + m_excess_disk;
- m_sorted_excess = (long *)malloc(m_total_excess * sizeof(long));
- std::copy(m_excess, m_excess + m_excess_filled, m_sorted_excess);
- if (m_excess_disk > 0L) {
- long *dest = m_sorted_excess + m_excess_filled;
- long toread = m_excess_disk;
- utils::read_n_objects_from_file(dest, toread, m_storage_filename.c_str());
- }
- std::sort(m_sorted_excess, m_sorted_excess + m_total_excess);
- }
-
- m_excess_ptr = 0;
- m_current_pos = 0;
- }
-
- inline long get_next() {
- long c = 0;
- while (m_excess_ptr < m_total_excess && m_sorted_excess[m_excess_ptr] == m_current_pos)
- ++m_excess_ptr, ++c;
- long result = c * 256L + m_count[m_current_pos];
-
- ++m_current_pos;
- return result;
- }
-
- void stop_sequential_access() {
- if (m_sequential_read_initialized) {
- free(m_sorted_excess);
- m_sequential_read_initialized = false;
- } else {
- fprintf(stderr, "\nError: attempting to stop sequential "
- "access to the gap array before it was initialized.\n");
- std::exit(EXIT_FAILURE);
- }
- }
-
- std::mutex m_excess_mutex;
- std::mutex m_gap_writing_mutex;
-
- ~buffered_gap_array() {
- if (m_sequential_read_initialized) {
- fprintf(stderr, "\nError: sequential access to gap was not terminated.");
- std::exit(EXIT_FAILURE);
- }
-
- free(m_count);
- delete[] m_excess;
- }
-
- void erase_disk_excess() {
- if (utils::file_exists(m_storage_filename))
- utils::file_delete(m_storage_filename);
- }
-
- // Write to a given file using v-byte encoding.
- void save_to_file(std::string fname) {
- fprintf(stderr, " Write gap to file: ");
- long double gap_write_start = utils::wclock();
- long bytes_written = 0L;
-
- start_sequential_access();
- typedef async_stream_writer<unsigned char> stream_writer_type;
- stream_writer_type *writer = new stream_writer_type(fname);
-
- for (long j = 0; j < m_length; ++j) {
- long val = get_next();
- while (val > 127) {
- writer->write((val & 0x7f) | 0x80);
- val >>= 7;
- ++bytes_written;
- }
- writer->write(val);
- }
-
- bytes_written += m_length;
- stop_sequential_access();
- delete writer;
-
- long double gap_write_time = utils::wclock() - gap_write_start;
- long double io_speed = (bytes_written / (1024.L * 1024)) / gap_write_time;
- fprintf(stderr, "%.2Lf (%.2LfMiB/s)\n", gap_write_time, io_speed);
- }
-
-
- //==============================================================================
- // Note about the input:
- // - j is the maximal integer such that gapsum[j] + j <= beg.
- // - S contains value gapsum[j] + j.
- //==============================================================================
- static void convert_gap_to_bitvector_aux(long beg, long end, long j, long S, buffered_gap_array *gap, bitvector *bv) {
- // Initialize pointer to sorted excess values.
- long excess_pointer = std::lower_bound(gap->m_sorted_excess,
- gap->m_sorted_excess + gap->m_total_excess, j) - gap->m_sorted_excess;
-
- // Compute gap[j].
- long gap_j = gap->m_count[j];
- while (excess_pointer < gap->m_total_excess && gap->m_sorted_excess[excess_pointer] == j) {
- gap_j += 256L;
- ++excess_pointer;
- }
-
- long p = beg;
- long ones = std::min(end - p, gap_j - (beg - S));
- for (long k = 0; k < ones; ++k) bv->set(p++);
- ++j;
-
- while (p < end) {
- ++p;
-
- // Compute gap[j].
- gap_j = gap->m_count[j];
- while (excess_pointer < gap->m_total_excess && gap->m_sorted_excess[excess_pointer] == j) {
- gap_j += 256L;
- ++excess_pointer;
- }
-
- ones = std::min(end - p, gap_j);
-
- for (long k = 0; k < ones; ++k) bv->set(p++);
- ++j;
- }
- }
-
- static void compute_j_aux(long range_beg, long n_chunks, long max_chunk_size,
- const long *sparse_gapsum, long &initial_gap_ptr, long &initial_gapsum_value, const buffered_gap_array *gap) {
- // Fast forward through as many chunks as possible.
- long j = 0L;
- long gapsum_j = 0L; // At any time gapsum_j = gap[0] + .. + gap[j - 1].
- while (j + 1 < n_chunks && sparse_gapsum[j + 1] + (max_chunk_size * (j + 1)) <= range_beg) ++j;
- gapsum_j = sparse_gapsum[j];
- j = (j * max_chunk_size);
-
- // Slowly find the right place in a single chunk.
- long excess_ptr = std::lower_bound(gap->m_sorted_excess, gap->m_sorted_excess + gap->m_total_excess, j) - gap->m_sorted_excess;
- while (j < gap->m_length) {
- long gap_j = gap->m_count[j];
- while (excess_ptr < gap->m_total_excess && gap->m_sorted_excess[excess_ptr] == j) {
- gap_j += 256L;
- ++excess_ptr;
- }
-
- if (gapsum_j + gap_j + j + 1 <= range_beg) {
- gapsum_j += gap_j;
- ++j;
- } else break;
- }
-
- // Store the answer.
- initial_gap_ptr = j;
- initial_gapsum_value = gapsum_j + j;
- }
-
-
- static void compute_gapsum_for_chunk_group(long group_beg, long group_end, long max_chunk_size,
- long *sparse_gapsum, const buffered_gap_array *gap) {
- for (long chunk_id = group_beg; chunk_id < group_end; ++chunk_id) {
- long chunk_beg = chunk_id * max_chunk_size;
- long chunk_end = std::min(chunk_beg + max_chunk_size, gap->m_length);
-
- // Compute sum of gap values inside chunk. We assume that
- // the excess values are in RAM and were sorted.
- long occ = std::upper_bound(gap->m_sorted_excess, gap->m_sorted_excess + gap->m_total_excess, chunk_end - 1)
- - std::lower_bound(gap->m_sorted_excess, gap->m_sorted_excess + gap->m_total_excess, chunk_beg);
- long gap_sum_inside_chunk = 256L * std::max(0L, occ);
- for (long j = chunk_beg; j < chunk_end; ++j)
- gap_sum_inside_chunk += gap->m_count[j];
-
- // Store the result.
- sparse_gapsum[chunk_id] = gap_sum_inside_chunk;
- }
- }
-
- bitvector* convert_to_bitvector(long max_threads) {
- // 1
- //
- // The term chunks is used to compute sparse gapsum array.
- // Chunk is a length such that
- // gapsum[k] = gap[0] + gap[1] + .. + gap[k * max_chunk_size - 1]
- long max_chunk_size = std::min(4L << 20, (m_length + max_threads - 1) / max_threads);
- long n_chunks = (m_length + max_chunk_size - 1) / max_chunk_size;
- long *sparse_gapsum = (long *)malloc(n_chunks * sizeof(long));
-
-
- // 2
- //
- // Compute the sum of gap value inside each chunk. Since there can be
- // more chunks than threads, we split chunks into groups and let each
- // thread compute the sum of gap values inside the group of chunks.
- long chunk_group_size = (n_chunks + max_threads - 1) / max_threads;
- long n_chunk_groups = (n_chunks + chunk_group_size - 1) / chunk_group_size;
-
- start_sequential_access();
- std::thread **threads = new std::thread*[n_chunk_groups];
- for (long t = 0; t < n_chunk_groups; ++t) {
- long chunk_group_beg = t * chunk_group_size;
- long chunk_group_end = std::min(chunk_group_beg + chunk_group_size, n_chunks);
-
- threads[t] = new std::thread(compute_gapsum_for_chunk_group, chunk_group_beg,
- chunk_group_end, max_chunk_size, sparse_gapsum, this);
- }
-
- for (long t = 0; t < n_chunk_groups; ++t) threads[t]->join();
- for (long t = 0; t < n_chunk_groups; ++t) delete threads[t];
- delete[] threads;
-
-
- // 3
- //
- // Compute comulative sum over sparse_gapsum array.
- long double gap_total_sum = 0L;
- for (long i = 0L; i < n_chunks; ++i) {
- long temp = sparse_gapsum[i];
- sparse_gapsum[i] = gap_total_sum;
- gap_total_sum += temp;
- }
-
-
- // 4
- //
- // Compute all initial gap pointers. For a thread handling range [beg..end), the
- // initial_gap_ptr values is the largest j, such that gapsum[j] + j <= beg.
- // After we find j, we store the value of gapsum[j] + j in initial_gapsum_value.
- long result_length = (m_length + gap_total_sum) - 1;
- bitvector *result = new bitvector(result_length + 1); // +1 is to make room for sentinel
-
- long max_range_size = (result_length + max_threads - 1) / max_threads;
- while (max_range_size & 7) ++max_range_size;
- long n_ranges = (result_length + max_range_size - 1) / max_range_size;
-
- long *initial_gap_ptr = new long[n_ranges];
- long *initial_gapsum_value = new long[n_ranges];
-
- threads = new std::thread*[n_ranges];
- for (long t = 0; t < n_ranges; ++t) {
- long range_beg = t * max_range_size;
- threads[t] = new std::thread(compute_j_aux, range_beg, n_chunks, max_chunk_size,
- sparse_gapsum, std::ref(initial_gap_ptr[t]), std::ref(initial_gapsum_value[t]), this);
- }
- for (long t = 0; t < n_ranges; ++t) threads[t]->join();
- for (long t = 0; t < n_ranges; ++t) delete threads[t];
-
-
- // 5
- //
- // Compute the bitvector. Each thread fills in the range of bits.
- for (long t = 0; t < n_ranges; ++t) {
- long range_beg = t * max_range_size;
- long range_end = std::min(range_beg + max_range_size, result_length);
-
- threads[t] = new std::thread(convert_gap_to_bitvector_aux, range_beg,
- range_end, initial_gap_ptr[t], initial_gapsum_value[t], this, result);
- }
-
- for (long t = 0; t < n_ranges; ++t) threads[t]->join();
- for (long t = 0; t < n_ranges; ++t) delete threads[t];
- delete[] threads;
-
- delete[] initial_gap_ptr;
- delete[] initial_gapsum_value;
- stop_sequential_access();
- free(sparse_gapsum);
-
- return result;
- }
-
- static const long k_excess_limit = (1L << 22);
-
- unsigned char *m_count;
- long m_length;
- long m_excess_filled;
- long m_excess_disk;
- long *m_excess;
-
- std::string m_storage_filename;
-
- bool m_sequential_read_initialized;
- long m_excess_ptr;
- long m_current_pos;
-
-public:
- long *m_sorted_excess;
- long m_total_excess;
-};
-
-
-struct gap_array_2n {
- gap_array_2n(const buffered_gap_array *gap, long max_threads) {
- m_length = gap->m_length;
- m_count = (uint16_t *)malloc(m_length * sizeof(uint16_t));
- parallel_utils::parallel_copy<unsigned char, uint16_t>(gap->m_count, m_count, m_length, max_threads);
- m_storage_filename = gap->m_storage_filename;
- m_excess_disk = gap->m_excess_disk;
- }
-
- gap_array_2n(long length) {
- m_length = length;
- m_count = (uint16_t *)malloc(m_length * sizeof(uint16_t));
- }
-
- ~gap_array_2n() {
- if (m_count)
- free(m_count);
- }
-
- static void apply_excess_aux(gap_array_2n *gap, const long *tab,
- long block_beg, long block_end, uint64_t &initial_run_length) {
- long block_size = block_end - block_beg;
-
- // Each thread gathers excess values in a buffer and at the end
- // copies then to the gap array's mutex-protected m_excess vector.
- std::vector<long> excess_buffer;
-
- // Compute the length of initial run.
- initial_run_length = 1UL;
- while (initial_run_length < (uint64_t)block_size && tab[block_beg] ==
- tab[block_beg + initial_run_length]) ++initial_run_length;
-
- // Update count values.
- for (long i = block_beg + initial_run_length; i < block_end; ++i) {
- long x = tab[i];
- uint64_t value = (uint64_t)gap->m_count[x] + 256UL;
- if (value >= (1UL << 16)) {
- value -= (1UL << 16);
- excess_buffer.push_back(x);
- }
- gap->m_count[x] = value;
- }
-
- // Copy the excess values to the gap array's mutex-protected vector.
- std::unique_lock<std::mutex> lk(gap->m_excess_mutex);
- for (long i = 0; i < (long)excess_buffer.size(); ++i)
- gap->m_excess.push_back(excess_buffer[i]);
- lk.unlock();
- }
-
- void apply_excess_from_disk(long ram_budget, long max_threads) {
- if (!m_excess_disk) return;
-
- // We only use half of the RAM for buffer, because we will use parallel
- // merge sort for sorting the buffer (which requires double the space
- // for the input).
- long elems = std::max(1L, ram_budget / (2L * (long)sizeof(long)));
- long *buffer = (long *)malloc(elems * sizeof(long));
-
- std::FILE *f = utils::open_file(m_storage_filename.c_str(), "r");
- std::thread **threads = new std::thread*[max_threads];
-
- // After sorting the buffer, when we split it equally between threads
- // we obey the rule, the every thread only counts the number of
- // elements equal to the first element in the handled range, but does
- // not do any updates for these elements. This prevents two threads
- // trying to update the same elements in the m_count array. The
- // length of the first run is computed and returned by each thread.
- // It is then updated sequentially.
- uint64_t *first_run_length = new uint64_t[max_threads];
-
- while (m_excess_disk > 0) {
- // Read a portion of excess values from disk.
- long toread = std::min(m_excess_disk, elems);
- utils::read_n_objects_from_file(buffer, toread, f);
-
- // Sort excess values in parallel.
- __gnu_parallel::sort(buffer, buffer + toread);
-
- // Update m_count and m_excess with elements from the buffer.
- // The buffer is dividied into blocks, each blocks handles one
- // block. Each thread updates the values except the first run
- // of the block, which is handled separatelly (sequentially).
- long max_block_size = (toread + max_threads - 1) / max_threads;
- long n_blocks = (toread + max_block_size - 1) / max_block_size;
-
- for (long t = 0; t < n_blocks; ++t) {
- long block_beg = t * max_block_size;
- long block_end = std::min(block_beg + max_block_size, toread);
-
- threads[t] = new std::thread(apply_excess_aux, this, buffer,
- block_beg, block_end, std::ref(first_run_length[t]));
- }
-
- for (long t = 0; t < n_blocks; ++t) threads[t]->join();
- for (long t = 0; t < n_blocks; ++t) delete threads[t];
-
- // Sequentially handle the elements in the first run of each block.
- for (long t = 0; t < n_blocks; ++t) {
- long block_beg = t * max_block_size;
- long first = buffer[block_beg]; // first elements in the block
-
- uint64_t freq = (uint64_t)m_count[first] + (first_run_length[t] * 256L);
- while (freq >= (1UL << 16)) {
- freq -= (1UL << 16);
- m_excess.push_back(first);
- }
- m_count[first] = freq;
- }
-
- m_excess_disk -= toread;
- }
-
- __gnu_parallel::sort(m_excess.begin(), m_excess.end());
-
- delete[] threads;
- delete[] first_run_length;
-
- std::fclose(f);
- free(buffer);
- }
-
- void set_count(long pos, long value) {
- while (value >= (1L << 16)) {
- m_excess.push_back(pos);
- value -= (1L << 16);
- }
- m_count[pos] = (uint64_t)value;
- }
-
- void erase_disk_excess() {
- if (utils::file_exists(m_storage_filename))
- utils::file_delete(m_storage_filename);
- }
-
- uint16_t *m_count;
-
- long m_length;
- long m_excess_disk;
-
- std::mutex m_excess_mutex;
- std::string m_storage_filename;
- std::vector<long> m_excess; // all excess values are in RAM
-};
-
-} // psascan_private
-
-#endif // __PSASCAN_SRC_GAP_ARRAY_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/gap_buffer.h b/exttools/pSAscan-0.1.0/src/psascan_src/gap_buffer.h
deleted file mode 100644
index 053ff715..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/gap_buffer.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * @file src/psascan_src/gap_buffer.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_GAP_BUFFER_H_INCLUDED
-#define __PSASCAN_SRC_GAP_BUFFER_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <queue>
-#include <mutex>
-#include <condition_variable>
-
-
-namespace psascan_private {
-
-template<typename value_type>
-struct gap_buffer {
- gap_buffer(long size_bytes, long n_increasers)
- : m_filled(0L),
- m_size(size_bytes / sizeof(value_type)) {
- m_content = new value_type[m_size];
-
- sblock_size = new long[n_increasers];
- sblock_beg = new long[n_increasers];
- }
-
- ~gap_buffer() {
- delete[] m_content;
- delete[] sblock_size;
- delete[] sblock_beg;
- }
-
- long m_filled, m_size;
- value_type *m_content;
-
- long *sblock_size;
- long *sblock_beg;
-};
-
-// Same class for the poll of empty and full gap buffers.
-template<typename value_type>
-struct gap_buffer_poll {
- typedef gap_buffer<value_type> gap_buffer_type;
-
- gap_buffer_poll(long worker_threads = 0L) {
- m_worker_threads = worker_threads; // unused for the poll of empty buffers.
- m_worker_threads_finished = 0L;
- }
-
- void add(gap_buffer_type *b) {
- m_queue.push(b);
- }
-
- bool available() const {
- return m_queue.size() > 0;
- }
-
- gap_buffer_type *get() {
- if (m_queue.empty()) {
- fprintf(stderr, "\nError: requesting a gap buffer from empty poll!\n");
- std::exit(EXIT_FAILURE);
- }
-
- gap_buffer_type *ret = m_queue.front();
- m_queue.pop();
-
- return ret;
- }
-
- bool finished() const {
- return m_worker_threads_finished == m_worker_threads;
- }
-
- void increment_finished_workers() {
- ++m_worker_threads_finished;
- }
-
- std::condition_variable m_cv;
- std::mutex m_mutex;
-
-private:
- long m_worker_threads;
- long m_worker_threads_finished; // to detect when all threads finished
-
- std::queue<gap_buffer_type*> m_queue;
-};
-
-} // psascan_private
-
-#endif // __PSASCAN_SRC_GAP_BUFFER_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/half_block_info.h b/exttools/pSAscan-0.1.0/src/psascan_src/half_block_info.h
deleted file mode 100644
index 6ab7842e..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/half_block_info.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * @file src/psascan_src/half_block_info.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_HALF_BLOCK_INFO_H_INCLUDED
-#define __PSASCAN_SRC_HALF_BLOCK_INFO_H_INCLUDED
-
-#include <string>
-
-#include "distributed_file.h"
-
-
-namespace psascan_private {
-
-// Stores the information about half-blocks.
-template<typename block_offset_type>
-struct half_block_info {
- long beg;
- long end;
-
- std::string gap_filename;
- distributed_file<block_offset_type> *psa;
-
- bool operator < (const half_block_info &i) const {
- return beg < i.beg;
- }
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_HALF_BLOCK_INFO_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/bwtsa.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/bwtsa.h
deleted file mode 100644
index 7ef48c1f..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/bwtsa.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/bwtsa.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_BWTSA_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_BWTSA_H_INCLUDED
-
-#include "../uint40.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename sa_type>
-struct bwtsa_t {
- sa_type sa;
- unsigned char bwt;
-
- inline operator sa_type() const {
- return sa;
- }
-
- bwtsa_t() {
- }
-
- bwtsa_t(long x) {
- sa = (sa_type)x;
- }
-
- bwtsa_t(int x) {
- sa = (sa_type)x;
- }
-
- bwtsa_t(uint40 x) {
- sa = (sa_type)x;
- }
-
-} __attribute__((packed));
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_BWTSA_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/change_gt_reference_point.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/change_gt_reference_point.h
deleted file mode 100644
index bf13cde7..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/change_gt_reference_point.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/change_gt_reference_point.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section DESCRIPTION
- *
- * In-place computation of gt_begin bitvector from gt_end bitvector
- * (reversed). The procedure uses the string range matching algorithm
- * described in
- *
- * Juha Karkkainen, Dominik Kempa, Simon J. Puglisi:
- * String Range Matching.
- * In Proc. CPM 2014, p. 232-241.
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_CHANGE_GT_REFERENCE_POINT_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_CHANGE_GT_REFERENCE_POINT_H_INCLUDED
-
-#include <cstring>
-#include <algorithm>
-#include <thread>
-
-#include "../bitvector.h"
-#include "srank_aux.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-//==============================================================================
-// Compute range [microblock_beg..microblock_end) of bits in the output
-// bitvector gt_out.
-//==============================================================================
-void gt_end_to_gt_begin_aux(const unsigned char *text, long text_length,
- long block_beg, long block_end, bitvector *gt) {
- long block_size = block_end - block_beg;
- const unsigned char *pat = text + block_beg, *txt = pat;
-
- long i = 1, el = 0L, s = 0L, p = 0L;
- long i_max = i, el_max = 0L, s_max = 0L, p_max = 0L;
-
- long rev_end = text_length - block_beg;
- while (i < block_size) {
- // Compute lcp(text[left_block_beg..), text[left_block_beg+i..),
- // but compare not more than left_block_size symbols (we have gt
- // to resolve the long comparisons).
- while (block_beg + i + el < block_end && txt[i + el] == pat[el])
- update_ms(pat, ++el, s, p);
-
- if (((block_beg + i + el != block_end && txt[i + el] > pat[el]) ||
- (block_beg + i + el == block_end && !gt->get(rev_end - i))))
- gt->set(rev_end - i);
- else gt->reset(rev_end - i);
-
- long j = i_max;
- if (el > el_max) {
- std::swap(el, el_max);
- std::swap(s, s_max);
- std::swap(p, p_max);
- i_max = i;
- }
-
- if (el < 100) {
- ++i;
- el = 0;
- } else if (p > 0L && (p << 2) <= el && !memcmp(pat, pat + p, s)) {
- long maxk = std::min(block_size - i, p);
- for (long k = 1L; k < maxk; ++k) {
- if (gt->get(rev_end - (j + k))) gt->set(rev_end - (i + k));
- else gt->reset(rev_end - (i + k));
- }
-
- i += p;
- el -= p;
- } else {
- long h = (el >> 2) + 1L;
- long maxk = std::min(h, block_size - i);
- for (long k = 1L; k < maxk; ++k) {
- if (gt->get(rev_end - (j + k))) gt->set(rev_end - (i + k));
- else gt->reset(rev_end - (i + k));
- }
-
- i += h;
- el = 0;
- s = 0;
- p = 0;
- }
- }
-}
-
-
-//==============================================================================
-// Change gt_end bitvector into gt_begin using string range matching.
-//==============================================================================
-void gt_end_to_gt_begin(const unsigned char *text, long text_length,
- bitvector *gt, long max_block_size) {
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
-
- //----------------------------------------------------------------------------
- // STEP 1: Compute the last bit in every block.
- //----------------------------------------------------------------------------
- for (long i = 0; i < n_blocks; ++i) {
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size;
- long rev_beg = text_length - block_end;
- gt->flip(rev_beg);
- }
-
- //----------------------------------------------------------------------------
- // STEP 2: compute remaining bits in every block.
- //----------------------------------------------------------------------------
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
-
- threads[i] = new std::thread(gt_end_to_gt_begin_aux,
- text, text_length, block_beg, block_end, gt);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_CHANGE_GT_REFERENCE_POINT_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/compute_initial_gt_bitvectors.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/compute_initial_gt_bitvectors.h
deleted file mode 100644
index 54ca636a..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/compute_initial_gt_bitvectors.h
+++ /dev/null
@@ -1,361 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/compute_initial_gt_bitvectors.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section DESCRIPTION
- *
- * Parallel computation of gt_end bitvectors. The procedure uses the
- * string range matching algorithm described in
- *
- * Juha Karkkainen, Dominik Kempa, Simon J. Puglisi:
- * String Range Matching.
- * In Proc. CPM 2014, p. 232-241.
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_COMPUTE_INITIAL_GT_BITVECTORS_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_COMPUTE_INITIAL_GT_BITVECTORS_H_INCLUDED
-
-#include <cstdio>
-#include <cstring>
-#include <algorithm>
-#include <thread>
-
-#include "../bitvector.h"
-#include "../multifile.h"
-#include "../multifile_bit_stream_reader.h"
-#include "../background_block_reader.h"
-#include "srank_aux.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-void compute_partial_gt_end(const unsigned char *text, long text_length,
- long begin, long end, long max_lcp, bitvector *gt, bitvector *undecided,
- bool &all_decided, long text_end, long supertext_length,
- const multifile *tail_gt_begin_rev,
- background_block_reader *tail_prefix_background_reader,
- const unsigned char *tail_prefix_preread) {
- bool res = true;
- all_decided = true;
- long revbeg = text_length - end;
-
- if (end == text_length) {
- // It's ok if tail_gt_begin_rev is NULL
- multifile_bit_stream_reader tail_gt_beg_rev(tail_gt_begin_rev);
- long tail_length = supertext_length - text_end;
- long range_size = end - begin;
- long tail_prefix_length = std::min(text_length, tail_length);
- long tail_prefix_fetched = 0;
-
- const unsigned char *txt = text + begin;
- const unsigned char *tail_prefix = NULL;
-
- if (tail_prefix_length > 0) {
- if (tail_prefix_preread != NULL) {
- // Whole tail prefix is already in memory.
- tail_prefix = tail_prefix_preread;
- tail_prefix_fetched = tail_prefix_length;
- } else {
- // Tail prefix will be fetched asynchronously in the background.
- tail_prefix = tail_prefix_background_reader->m_data;
- tail_prefix_fetched = 0;
- }
- }
-
- long i = 0, el = 0, s = 0, p = 0;
- long i_max = 0, el_max = 0, s_max = 0, p_max = 0;
-
- static const long chunk_size = (1L << 20);
-
- while (i < range_size) {
- while (i + el < range_size && el < tail_length) {
- if (el == tail_prefix_fetched) {
- long next_chunk = std::min(chunk_size,
- tail_prefix_length - tail_prefix_fetched);
- tail_prefix_fetched += next_chunk;
- tail_prefix_background_reader->wait(tail_prefix_fetched);
- }
- while (i + el < range_size && el < tail_length &&
- el < tail_prefix_fetched && txt[i + el] == tail_prefix[el])
- update_ms(tail_prefix, ++el, s, p);
- if (el < tail_prefix_fetched) break;
- }
-
- if ((el == tail_length) ||
- (i + el == range_size && !tail_gt_beg_rev.access(tail_length - el)) ||
- (i + el < range_size && txt[i + el] > tail_prefix[el]))
- gt->set(revbeg + i);
-
- long j = i_max;
- if (el > el_max) {
- std::swap(el, el_max);
- std::swap(s, s_max);
- std::swap(p, p_max);
- i_max = i;
- }
-
- if (el < 100) {
- ++i;
- el = 0;
- } else if (p > 0 && (p << 2) <= el &&
- !memcmp(tail_prefix, tail_prefix + p, s)) {
- long maxk = std::min(p, range_size - i);
- for (long k = 1; k < maxk; ++k)
- if (gt->get(revbeg + j + k)) gt->set(revbeg + i + k);
- i += p;
- el -= p;
- } else {
- long h = (el >> 2) + 1;
- long maxk = std::min(h, range_size - i);
- for (long k = 1; k < maxk; ++k)
- if (gt->get(revbeg + j + k)) gt->set(revbeg + i + k);
- i += h;
- el = 0;
- p = 0;
- s = 0;
- }
- }
- } else {
- long i = 0, el = 0, s = 0, p = 0;
- long i_max = 0, el_max = 0, s_max = 0, p_max = 0;
-
- const unsigned char *txt = text + begin;
- const unsigned char *pat = text + end;
- long range_size = end - begin;
-
- while (i < range_size) {
- while (el < max_lcp && txt[i + el] == pat[el])
- update_ms(pat, ++el, s, p);
-
- if (el < max_lcp) {
- if (txt[i + el] > pat[el]) gt->set(revbeg + i);
- } else {
- undecided->set(revbeg + i);
- res = false;
- }
-
- long j = i_max;
- if (el > el_max) {
- std::swap(el, el_max);
- std::swap(s, s_max);
- std::swap(p, p_max);
- i_max = i;
- }
-
- if (el < 100) {
- ++i;
- el = 0;
- } else if (p > 0 && (p << 2) <= el && !memcmp(pat, pat + p, s)) {
- long maxk = std::min(p, range_size - i);
- for (long k = 1; k < maxk; ++k) {
- if (undecided->get(revbeg + (j + k))) undecided->set(revbeg + (i + k));
- if (gt->get(revbeg + (j + k))) gt->set(revbeg + (i + k));
- }
-
- i += p;
- el -= p;
- } else {
- long h = (el >> 2) + 1;
- long maxk = std::min(h, range_size - i);
- for (long k = 1; k < maxk; ++k) {
- if (undecided->get(revbeg + (j + k))) undecided->set(revbeg + (i + k));
- if (gt->get(revbeg + (j + k))) gt->set(revbeg + (i + k));
- }
-
- i += h;
- el = 0;
- s = 0;
- p = 0;
- }
- }
- }
-
- all_decided = res;
-}
-
-//==============================================================================
-// Set all undecided bits inside the given microblock (that is, the range
-// [mb_beg..mb_end)) of all gt bitvectors to their correct values.
-//==============================================================================
-void compute_final_gt(long text_length, long max_block_size, long mb_beg,
- long mb_end, bitvector *gt, const bitvector *undecided,
- const bool *all_decided) {
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
-
- // Go through blocks right to left.
- for (long t = n_blocks - 2; t >= 0; --t) {
- long block_end = text_length - (n_blocks - 1 - t) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long this_block_size = block_end - block_beg;
- long this_mb_beg = mb_beg;
- long this_mb_end = std::min(mb_end, this_block_size);
-
- long rev_beg = text_length - block_end;
- long rev_end = text_length - block_beg;
-
- if (!all_decided[t]) {
- // This eliminates the problem with accessing bits located in the same
- // byte in the bitvector. Skipped bits are later updated sequentially.
- while (((rev_end - 1 - this_mb_beg) & 7) != 7) ++this_mb_beg;
- for (long j = this_mb_beg; j < this_mb_end; ++j)
- if (undecided->get(rev_end - 1 - j) && gt->get(rev_beg - 1 - j))
- gt->set(rev_end - 1 - j);
- }
- }
-}
-
-//==============================================================================
-// Update the bits omitted in compute_final_gt.
-//==============================================================================
-void compute_final_gt_last_bits(long text_length, long max_block_size,
- long mb_beg, long mb_end, bitvector *gt, const bitvector *undecided,
- bool *all_decided) {
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
- if (!all_decided[0]) {
- long block_end = text_length - (n_blocks - 1) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long this_block_size = block_end - block_beg;
- long this_mb_beg = mb_beg;
- long this_mb_end = std::min(mb_end, this_block_size);
-
- long rev_beg = text_length - block_end;
- long rev_end = text_length - block_beg;
-
- long temp_this_mb_beg = this_mb_beg;
- while (((rev_end - 1 - temp_this_mb_beg) & 7) != 7) ++temp_this_mb_beg;
- this_mb_end = temp_this_mb_beg;
-
- // [this_mb_beg..this_mb_end) were omitted.
- for (long j = this_mb_beg; j < this_mb_end; ++j)
- if (undecided->get(rev_end - 1 - j) && gt->get(rev_beg - 1 - j))
- gt->set(rev_end - 1 - j);
- }
-}
-
-//==============================================================================
-// Fully parallel computation of gt bitvectors.
-//==============================================================================
-void compute_initial_gt_bitvectors(const unsigned char *text, long text_length,
- bitvector *gt, long max_block_size, long max_threads, long text_end,
- long supertext_length, const multifile *tail_gt_begin_reversed,
- background_block_reader *tail_prefix_background_reader,
- const unsigned char *tail_prefix_preread) {
- long double start;
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
-
- //----------------------------------------------------------------------------
- // STEP 1: compute gt bitvectors, some bits may still be undecided after this.
- //----------------------------------------------------------------------------
-
- // Allocate ane zero-initialize (in parallel) bitvectors.
- fprintf(stderr, " Allocating: ");
- start = utils::wclock();
- bitvector *undecided = new bitvector(text_length);
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- // all_decided[i] == true, if all bits inside block i were
- // decided in the first stage. This can be used by threads in the
- // second stage to completely skip inspecting some blocks.
- bool *all_decided = new bool[n_blocks];
-
- // Process blocks right-to-left.
- fprintf(stderr, " Computing decided bits: ");
- start = utils::wclock();
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
-
- // Compute bitvectors 'gt' and 'undecided' for block i.
- threads[i] = new std::thread(compute_partial_gt_end,
- text, text_length, block_beg, block_end, max_block_size, gt,
- undecided, std::ref(all_decided[i]), text_end, supertext_length,
- tail_gt_begin_reversed, tail_prefix_background_reader,
- tail_prefix_preread);
- }
-
- // Wait for the threads to finish and clean up.
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- //----------------------------------------------------------------------------
- // STEP 2: compute the undecided bits in the gt bitvectors.
- //----------------------------------------------------------------------------
-
- // The size of micro block has to be a multiple of 8, otherwise two
- // threads might try to update the same char inside bitvector.
- long max_microblock_size = (max_block_size + max_threads - 1) / max_threads;
- while ((max_microblock_size & 7) && max_microblock_size < max_block_size)
- ++max_microblock_size;
- long n_microblocks = (max_block_size + max_microblock_size - 1) / max_microblock_size;
-
- fprintf(stderr, " Computing undecided bits: ");
- start = utils::wclock();
- threads = new std::thread*[n_microblocks];
- for (long i = 0; i < n_microblocks; ++i) {
- long mb_beg = i * max_microblock_size;
- long mb_end = std::min(mb_beg + max_microblock_size, max_block_size);
-
- threads[i] = new std::thread(compute_final_gt, text_length, max_block_size,
- mb_beg, mb_end, std::ref(gt), std::ref(undecided), all_decided);
- }
-
- // Wait for the threads to finish and clean up.
- for (long i = 0; i < n_microblocks; ++i) threads[i]->join();
- for (long i = 0; i < n_microblocks; ++i) delete threads[i];
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- // Fill in the skipped (due to parallel byte access issue) undecided bits.
- for (long i = 0; i < n_microblocks; ++i) {
- long mb_beg = i * max_microblock_size;
- long mb_end = std::min(mb_beg + max_microblock_size, max_block_size);
-
- compute_final_gt_last_bits(text_length, max_block_size, mb_beg, mb_end,
- gt, undecided, all_decided);
- }
-
- fprintf(stderr, " Deallocating: ");
- start = utils::wclock();
- delete[] threads;
- delete undecided;
- delete[] all_decided;
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_COMPUTE_INITIAL_GT_BITVECTORS_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/divsufsort_template.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/divsufsort_template.h
deleted file mode 100644
index 86c5c903..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/divsufsort_template.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/divsufsort_template.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_DIVSUFSORT_TEMPLATE_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_DIVSUFSORT_TEMPLATE_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-
-#include "divsufsort.h"
-#include "divsufsort64.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename T>
-void run_divsufsort(const unsigned char *, T*, T) {
- fprintf(stderr, "\ndivsufsort: non-standard call. Use either"
- "int or long for second and third argument.\n");
- std::exit(EXIT_FAILURE);
-}
-
-template<>
-void run_divsufsort(const unsigned char *text, int *sa, int length) {
- divsufsort(text, sa, length);
-}
-
-template<>
-void run_divsufsort(const unsigned char *text, long *sa, long length) {
- divsufsort64(text, sa, length);
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_DIVSUFSORT_TEMPLATE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/initial_partial_sufsort.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/initial_partial_sufsort.h
deleted file mode 100644
index b298f9d2..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/initial_partial_sufsort.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/initial_partial_sufsort.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INITIAL_PARTIAL_SUFSORT_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INITIAL_PARTIAL_SUFSORT_H_INCLUDED
-
-#include <algorithm>
-#include <thread>
-
-#include "../bitvector.h"
-#include "divsufsort_template.h"
-#include "bwtsa.h"
-#include "parallel_shrink.h"
-#include "parallel_expand.h"
-#include "parallel_copy.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-//==============================================================================
-// Rename the given block using its gt bitvector.
-//==============================================================================
-void rename_block(unsigned char *text, long text_length, long block_beg,
- long block_length, bitvector *gt, bool &renaming_error) {
- long block_end = block_beg + block_length;
- long beg_rev = text_length - block_end;
- unsigned char *block = text + block_beg;
- unsigned char last = block[block_length - 1];
- bool err = false;
- for (long i = 0; i + 1 < block_length; ++i)
- if (block[i] > last || (block[i] == last && gt->get(beg_rev + i + 1))) {
- if (block[i] == 255)
- err = true;
- ++block[i];
- }
- if (block[block_length - 1] == 255)
- err = true;
- ++block[block_length - 1];
-
- if (err)
- renaming_error = true;
-}
-
-
-//==============================================================================
-// Re-rename block back to original.
-//==============================================================================
-void rerename_block(unsigned char *block, long block_length) {
- unsigned char last = block[block_length - 1] - 1;
- for (long i = 0; i < block_length; ++i)
- if (block[i] > last) --block[i];
-}
-
-
-//==============================================================================
-// Given gt bitvectors, compute partial suffix arrays of blocks.
-//==============================================================================
-template<typename saidx_t>
-void initial_partial_sufsort(unsigned char *, long, bitvector *,
- bwtsa_t<saidx_t> *, long, long, bool) {
- fprintf(stderr, "Error: initial_partial_sufsort: given saidx_t is "
- "not supported, sizeof(saidx_t) = %ld\n", (long)sizeof(saidx_t));
- std::exit(EXIT_FAILURE);
-}
-
-template<>
-void initial_partial_sufsort(unsigned char *text, long text_length,
- bitvector* gt, bwtsa_t<uint40> *bwtsa, long max_block_size,
- long max_threads, bool has_tail) {
- long double start = utils::wclock();
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
-
- //----------------------------------------------------------------------------
- // STEP 1: Rename the blocks in parallel.
- //----------------------------------------------------------------------------
-
- if (n_blocks > 1 || has_tail) {
- fprintf(stderr, " Renaming blocks: ");
- start = utils::wclock();
- bool *renaming_error = new bool[n_blocks];
- std::fill(renaming_error, renaming_error + n_blocks, false);
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(rename_block, text, text_length, block_beg,
- block_size, gt, std::ref(renaming_error[i]));
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- bool err = false;
- for (long i = 0; i < n_blocks; ++i)
- if (renaming_error[i]) err = true;
- delete[] renaming_error;
-
- if (err) {
- fprintf(stdout, "\n\nError: byte with value 255 was detected in the input text!\n"
- "See the section on limitations in the README for more information.\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
- }
-
- if (max_block_size >= (2L << 30)) { // Use 64-bit divsufsort.
- fprintf(stdout, "\nError: 2GiB+ partial suffix arrays are not "
- "yet supported by the internal-memory pSAscan.\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- } else { // Use 32-bit divsufsort.
- int *temp_sa = (int *)bwtsa;
-
- //--------------------------------------------------------------------------
- // STEP 2: Compute suffix arrays in parallel.
- //--------------------------------------------------------------------------
- fprintf(stderr, " Running divsufsort32 in parallel: ");
- start = utils::wclock();
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(run_divsufsort<int>,
- text + block_beg, temp_sa + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- fprintf(stderr, " Expanding 32-bit integers to bwtsa objects: ");
- start = utils::wclock();
- parallel_expand<int, bwtsa_t<uint40> >(temp_sa, text_length, max_threads);
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
- }
-
- //----------------------------------------------------------------------------
- // STEP 3: Restore the original text.
- //----------------------------------------------------------------------------
- if (n_blocks > 1 || has_tail) {
- fprintf(stderr, " Rerenaming blocks: ");
- start = utils::wclock();
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(rerename_block,
- text + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
- }
-}
-
-template<>
-void initial_partial_sufsort(unsigned char *text, long text_length,
- bitvector* gt, bwtsa_t<int> *bwtsa, long max_block_size, long max_threads,
- bool has_tail) {
- long double start = utils::wclock();
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
-
- //----------------------------------------------------------------------------
- // STEP 1: Rename the blocks in parallel.
- //----------------------------------------------------------------------------
- if (n_blocks > 1 || has_tail) {
- fprintf(stderr, " Renaming blocks: ");
- start = utils::wclock();
- bool *renaming_error = new bool[n_blocks];
- std::fill(renaming_error, renaming_error + n_blocks, false);
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(rename_block, text, text_length, block_beg,
- block_size, gt, std::ref(renaming_error[i]));
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- bool err = false;
- for (long i = 0; i < n_blocks; ++i)
- if (renaming_error[i]) err = true;
- delete[] renaming_error;
-
- if (err) {
- fprintf(stdout, "\n\nError: byte with value 255 was detected in the input text!\n"
- "See the section on limitations in the README for more information.\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
- }
-
- int *temp_sa = (int *)bwtsa;
-
- //----------------------------------------------------------------------------
- // STEP 2: Compute suffix arrays in parallel.
- //----------------------------------------------------------------------------
- fprintf(stderr, " Running divsufsort32 in parallel: ");
- start = utils::wclock();
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(run_divsufsort<int>,
- text + block_beg, temp_sa + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- fprintf(stderr, " Expanding 32-bit integers to bwtsa objects: ");
- start = utils::wclock();
- parallel_expand<int, bwtsa_t<int> >(temp_sa, text_length, max_threads);
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- //----------------------------------------------------------------------------
- // STEP 3: Restore the original text.
- //----------------------------------------------------------------------------
- if (n_blocks > 1 || has_tail) {
- fprintf(stderr, " Rerenaming blocks: ");
- start = utils::wclock();
- threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_end = text_length - (n_blocks - 1 - i) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(rerename_block,
- text + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
- }
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INITIAL_PARTIAL_SUFSORT_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwt_from_sa.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwt_from_sa.h
deleted file mode 100644
index c88901f8..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwt_from_sa.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/inmem_bwt_from_sa.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWT_FROM_SA_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWT_FROM_SA_H_INCLUDED
-
-#include <algorithm>
-#include <thread>
-
-#include "../utils.h"
-#include "bwtsa.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename saidx_t>
-void compute_bwt_in_bwtsa_aux(const unsigned char *text, long beg,
- long end, bwtsa_t<saidx_t> *dest, long *i0) {
- *i0 = -1;
- for (long j = beg; j < end; ++j) {
- if (dest[j].sa) dest[j].bwt = text[dest[j].sa - 1];
- else { dest[j].bwt = 0; *i0 = j; }
- }
-}
-
-template<typename saidx_t>
-void compute_bwt_in_bwtsa(const unsigned char *text, long length,
- bwtsa_t<saidx_t> *dest, long max_threads, long &result) {
- long max_block_size = (length + max_threads - 1) / max_threads;
- long n_blocks = (length + max_block_size - 1) / max_block_size;
- long *index_0 = new long[n_blocks];
-
- // Compute bwt and find i0, where sa[i0] == 0.
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_beg = i * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
-
- threads[i] = new std::thread(compute_bwt_in_bwtsa_aux<saidx_t>,
- text, block_beg, block_end, dest, index_0 + i);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-
- // Find and return i0.
- result = -1;
- for (long i = 0; i < n_blocks; ++i)
- if (index_0[i] != -1) result = index_0[i];
- delete[] index_0;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWT_FROM_SA_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwtsa_merge.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwtsa_merge.h
deleted file mode 100644
index 108638a5..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_bwtsa_merge.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/inmem_bwtsa_merge.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWTSA_MERGE_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWTSA_MERGE_H_INCLUDED
-
-#include <cstdio>
-#include <vector>
-#include <string>
-#include <algorithm>
-
-#include "../bitvector.h"
-#include "../multifile.h"
-#include "inmem_gap_array.h"
-#include "inmem_compute_gap.h"
-#include "parallel_merge.h"
-#include "pagearray.h"
-#include "bwtsa.h"
-#include "merge_schedule.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename saidx_t, unsigned pagesize_log>
-pagearray<bwtsa_t<saidx_t>, pagesize_log> *inmem_bwtsa_merge(
- const unsigned char *text,
- long text_length,
- bwtsa_t<saidx_t> *bwtsa,
- bitvector *gt,
- long max_block_size,
- long range_beg,
- long range_end,
- long max_threads,
- bool need_gt,
- bool need_bwt,
- long &result_i0,
- MergeSchedule &schedule,
- long text_beg,
- long text_end,
- long supertext_length,
- std::string supertext_filename,
- const multifile *tail_gt_begin_reversed,
- long *i0_array,
- long **block_rank_matrix) {
- typedef pagearray<bwtsa_t<saidx_t>, pagesize_log> pagearray_type;
-
- long shift = (max_block_size - text_length % max_block_size) % max_block_size;
- long range_size = range_end - range_beg;
-
- if (range_size == 1) {
- long block_beg = range_beg * max_block_size;
- long block_end = block_beg + max_block_size;
- block_beg = std::max(0L, block_beg - shift);
- block_end -= shift;
-
- result_i0 = i0_array[range_beg];
- pagearray_type *bwtsa_pagearray =
- new pagearray_type(bwtsa + block_beg, bwtsa + block_end);
- return bwtsa_pagearray;
- }
-
- //----------------------------------------------------------------------------
- // STEP 1: Split the blocks in the left and right group.
- //----------------------------------------------------------------------------
- long lrange_size = schedule.left_size(range_size);
- long rrange_size = range_size - lrange_size;
-
- long lrange_beg = range_beg;
- long lrange_end = range_beg + lrange_size;
- long rrange_beg = lrange_end;
- long rrange_end = rrange_beg + rrange_size;
-
- long lbeg = lrange_beg * max_block_size;
- long rbeg = rrange_beg * max_block_size;
- long lend = rbeg;
- long rend = rbeg + rrange_size * max_block_size;
- lbeg = std::max(0L, lbeg - shift);
- rbeg -= shift;
- lend -= shift;
- rend -= shift;
-
- long lsize = lend - lbeg;
- long rsize = rend - rbeg;
-
- //----------------------------------------------------------------------------
- // STEP 2: Compute partial SAs and BWTs for left and right block.
- //----------------------------------------------------------------------------
-
- // 2.a
- //
- // Left block
- long left_i0;
- pagearray_type *l_bwtsa = inmem_bwtsa_merge<saidx_t, pagesize_log>(text,
- text_length, bwtsa, gt, max_block_size, lrange_beg, lrange_end,
- max_threads, need_gt, true, left_i0, schedule, text_beg, text_end,
- supertext_length, supertext_filename, tail_gt_begin_reversed, i0_array,
- block_rank_matrix);
-
- // 2.b
- //
- // Right block
- long right_i0;
- pagearray_type *r_bwtsa = inmem_bwtsa_merge<saidx_t, pagesize_log>(text,
- text_length, bwtsa, gt, max_block_size, rrange_beg, rrange_end,
- max_threads, true, need_bwt, right_i0, schedule, text_beg, text_end,
- supertext_length, supertext_filename, tail_gt_begin_reversed, i0_array,
- block_rank_matrix);
-
- //----------------------------------------------------------------------------
- // STEP 3: Merge partial SAs and BWTs.
- //----------------------------------------------------------------------------
- fprintf(stderr, "Merging blocks %ld-%ld with %ld-%ld\n",
- lrange_beg + 1, lrange_end, rrange_beg + 1, rrange_end);
- long double start = utils::wclock();
-
- // 3.a
- //
- // Compute gap
- fprintf(stderr, " Computing gap:\n");
- inmem_gap_array *gap;
- long double rank_init_time;
- long double streaming_time;
- long double start1 = utils::wclock();
- inmem_compute_gap<saidx_t, pagesize_log>(text, text_length, lbeg, lsize,
- rsize, *l_bwtsa, gt, gap, max_threads, need_gt, left_i0, (1L << 21),
- rank_init_time, streaming_time, block_rank_matrix, lrange_beg,
- lrange_size, rrange_size);
- fprintf(stderr, " Time: %.2Lf\n", utils::wclock() - start1);
-
- // 3.b
- //
- // Merge partial SAs and BWTs
- fprintf(stderr, " Merging SA/BWT: ");
- start1 = utils::wclock();
- long delta_i0;
- if (need_bwt)
- (*r_bwtsa)[right_i0].bwt = text[rbeg - 1];
- pagearray_type *result = parallel_merge(l_bwtsa, r_bwtsa, gap,
- max_threads, left_i0, delta_i0, lsize);
- result_i0 = left_i0 + delta_i0;
- long double merging_time = utils::wclock() - start1;
- fprintf(stderr, "total: %.2Lf\n", merging_time);
-
- // 3.c
- //
- // Clean up.
- start1 = utils::wclock();
- delete l_bwtsa;
- delete r_bwtsa;
- delete gap;
- long double cleaning_time = utils::wclock() - start1;
- if (cleaning_time > 0.2L)
- fprintf(stderr, "Cleaning: %.2Lf\n", cleaning_time);
-
- long double time_per_elem_left = merging_time / (lsize + rsize) + rank_init_time / lsize;
- long double time_per_elem_right = merging_time / (lsize + rsize) + streaming_time / rsize;
- long double ratio = time_per_elem_right / time_per_elem_left;
- fprintf(stderr, "Time: %.2Lf (rl_ratio = %.3Lf)\n",
- utils::wclock() - start, ratio);
-
- return result;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_BWTSA_MERGE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_gap.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_gap.h
deleted file mode 100644
index 95a1b4e5..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_gap.h
+++ /dev/null
@@ -1,297 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/inmem_compute_gap.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_GAP_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_GAP_H_INCLUDED
-
-#include <cstdio>
-#include <map>
-#include <vector>
-#include <thread>
-#include <algorithm>
-
-#include "../bitvector.h"
-#include "../gap_buffer.h"
-#include "../multifile.h"
-#include "rank.h"
-#include "inmem_gap_array.h"
-#include "inmem_compute_initial_ranks.h"
-#include "inmem_stream.h"
-#include "inmem_update.h"
-#include "inmem_bwt_from_sa.h"
-#include "pagearray.h"
-#include "bwtsa.h"
-#include "sparse_isa.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename saidx_t, unsigned pagesize_log>
-void inmem_compute_gap(const unsigned char *text, long text_length, long left_block_beg,
- long left_block_size, long right_block_size,
- const pagearray<bwtsa_t<saidx_t>, pagesize_log> &bwtsa,
- bitvector *gt, inmem_gap_array* &gap, long max_threads, bool need_gt, long i0,
- long gap_buf_size, long double &rank_init_time, long double &streaming_time,
- long **block_rank_matrix, long lrange_beg, long lrange_size, long rrange_size) {
- long lrange_end = lrange_beg + lrange_size;
- long rrange_end = lrange_end + rrange_size;
-
- //----------------------------------------------------------------------------
- // STEP 1: build rank data structure over BWT.
- //----------------------------------------------------------------------------
- fprintf(stderr, " Building rank: ");
- long double start = utils::wclock();
- typedef rank4n<saidx_t, pagesize_log> rank_type;
- rank_type *rank = new rank_type(&bwtsa, left_block_size, max_threads);
- rank_init_time = utils::wclock() - start;
- fprintf(stderr, "total: %.2Lf\n", rank_init_time);
-
- //----------------------------------------------------------------------------
- // STEP 2: compute symbol counts and the last symbol of the left block.
- //----------------------------------------------------------------------------
- long *count = new long[256];
- const unsigned char *left_block = text + left_block_beg;
- std::copy(rank->m_count, rank->m_count + 256, count);
- unsigned char last = left_block[left_block_size - 1];
- ++count[last];
- --count[0];
- for (long i = 0, s = 0, t; i < 256; ++i)
- { t = count[i]; count[i] = s; s += t; }
-
- //----------------------------------------------------------------------------
- // STEP 3: compute starting positions for all streaming threads.
- //----------------------------------------------------------------------------
- long left_block_end = left_block_beg + left_block_size;
- long right_block_beg = left_block_end;
- long right_block_end = left_block_end + right_block_size;
-
- long max_stream_block_size = (right_block_size + max_threads - 1) / max_threads;
- while (max_stream_block_size & 7) ++max_stream_block_size;
- long n_threads = (right_block_size + max_stream_block_size - 1) / max_stream_block_size;
-
- fprintf(stderr, " Computing initial ranks: ");
- start = utils::wclock();
- std::vector<long> initial_ranks(n_threads);
- std::vector<std::pair<long, long> > initial_ranges(n_threads);
- std::thread **threads = new std::thread*[n_threads];
-
- // 3.a
- //
- // Compute the last starting position using the matrix of initial ranks.
- typedef pagearray<bwtsa_t<saidx_t>, pagesize_log> pagearray_bwtsa_type;
- long last_stream_block_beg = right_block_beg + (n_threads - 1) * max_stream_block_size;
- long last_stream_block_end = right_block_end;
-
- initial_ranks[n_threads - 1] = 0L;
- for (long j = lrange_beg; j < lrange_end; ++j)
- initial_ranks[n_threads - 1] += block_rank_matrix[j][rrange_end - 1];
-
- // 3.b
- //
- // Compute the starting position for all
- // starting positions other than the last one.
- long prev_stream_block_size = last_stream_block_end - last_stream_block_beg;
- for (long i = n_threads - 2; i >= 0; --i) {
- long stream_block_beg = right_block_beg + i * max_stream_block_size;
- long stream_block_end = std::min(stream_block_beg + max_stream_block_size, right_block_end);
- long stream_block_size = stream_block_end - stream_block_beg;
- const unsigned char *pat = text + stream_block_end;
-
- threads[i] = new std::thread(compute_range<pagearray_bwtsa_type>,
- text, left_block_beg, left_block_size, pat, prev_stream_block_size,
- std::ref(bwtsa), std::ref(initial_ranges[i]));
-
- prev_stream_block_size = stream_block_size;
- }
-
- for (long i = 0; i + 1 < n_threads; ++i) threads[i]->join();
- for (long i = 0; i + 1 < n_threads; ++i) delete threads[i];
- delete[] threads;
- fprintf(stderr, "%.2Lf ", utils::wclock() - start);
-
- bool nontrivial_range = false;
- for (long j = 0; j < n_threads - 1; ++j)
- if (initial_ranges[j].first != initial_ranges[j].second)
- nontrivial_range = true;
-
- if (nontrivial_range) {
- // 3.c
- //
- // Build the data structure allowing answering ISA queries.
- start = utils::wclock();
- typedef pagearray<bwtsa_t<saidx_t>, pagesize_log> pagearray_type;
- typedef sparse_isa<pagearray_type, rank_type, 12U> sparse_isa_type;
- sparse_isa_type *sp_isa = new sparse_isa_type(&bwtsa, text +
- left_block_beg, rank, left_block_size, i0, max_threads);
- fprintf(stderr, "%.3Lf ", utils::wclock() - start);
-
- // 3.d
- //
- // Narrow nontrivial ranges to single elements.
- start = utils::wclock();
- prev_stream_block_size = last_stream_block_end - last_stream_block_beg;
- long prev_rank = initial_ranks[n_threads - 1];
- for (long i = n_threads - 2; i >= 0; --i) {
- long stream_block_beg = right_block_beg + i * max_stream_block_size;
- long stream_block_end = std::min(stream_block_beg + max_stream_block_size, right_block_end);
- long stream_block_size = stream_block_end - stream_block_beg;
- long suf_start = stream_block_end;
-
- long left = initial_ranges[i].first;
- long right = initial_ranges[i].second;
-
- // Keep refining the range [left..right) until it's empty.
- while (left != right) {
- // Valid values for mid are in [left..right).
- long mid = (left + right) / 2;
-
- // Check if suffix starting at position suf_start is larger
- // than the one starting at block_beg + bwtsa[mid].sa in the text.
- // We know they have a common prefix of length prev_stream_block_size.
- if ((long)bwtsa[mid].sa + prev_stream_block_size >= left_block_size) {
- if (gt->get(text_length - 1 - (suf_start + left_block_size - (long)bwtsa[mid].sa - 1))) left = mid + 1;
- else right = mid;
- } else {
- long j = bwtsa[mid].sa + prev_stream_block_size;
- if (sp_isa->query(j) < prev_rank) left = mid + 1;
- else right = mid;
- }
- }
-
- initial_ranks[i] = left;
- prev_rank = left;
- prev_stream_block_size = stream_block_size;
- }
-
- delete sp_isa;
- fprintf(stderr, "%.3Lf ", utils::wclock() - start);
- } else {
- for (long j = 0; j + 1 < n_threads; ++j)
- initial_ranks[j] = initial_ranges[j].first;
- }
- fprintf(stderr, "\n");
-
- //----------------------------------------------------------------------------
- // STEP 4: allocate gap array. The gap array is indexed from 0 to
- // left_block_size so the number of elements is left_block_size + 1.
- //----------------------------------------------------------------------------
- start = utils::wclock();
- gap = new inmem_gap_array(left_block_size + 1);
-
- //----------------------------------------------------------------------------
- // STEP 5: allocate buffers, buffer polls and auxiliary arrays.
- //----------------------------------------------------------------------------
-
- // Allocate gap buffers.
- long n_gap_buffers = 2 * n_threads;
- gap_buffer<saidx_t> **gap_buffers = new gap_buffer<saidx_t>*[n_gap_buffers];
- for (long i = 0; i < n_gap_buffers; ++i)
- gap_buffers[i] = new gap_buffer<saidx_t>(gap_buf_size, max_threads);
-
- // Create poll of empty and full buffers.
- gap_buffer_poll<saidx_t> *empty_gap_buffers = new gap_buffer_poll<saidx_t>();
- gap_buffer_poll<saidx_t> *full_gap_buffers = new gap_buffer_poll<saidx_t>(n_threads);
-
- // Add empty buffers to empty poll.
- for (long i = 0; i < n_gap_buffers; ++i)
- empty_gap_buffers->add(gap_buffers[i]);
-
- // Allocate temp arrays and oracles.
- long max_buffer_elems = gap_buf_size / sizeof(saidx_t);
- saidx_t *temp = (saidx_t *)malloc(max_buffer_elems * n_threads * sizeof(saidx_t));
- int *oracle = (int *)malloc(max_buffer_elems * n_threads * sizeof(int));
- long double allocations_time = utils::wclock() - start;
- if (allocations_time > 0.05L)
- fprintf(stderr, " Allocations: %.2Lf\n", allocations_time);
-
- //----------------------------------------------------------------------------
- // STEP 6: run the parallel streaming.
- //----------------------------------------------------------------------------
-
- // Start streaming threads.
- fprintf(stderr, " Streaming: ");
- start = utils::wclock();
- threads = new std::thread*[n_threads];
- for (long t = 0; t < n_threads; ++t) {
- long beg = right_block_beg + t * max_stream_block_size;
- long end = std::min(beg + max_stream_block_size, right_block_end);
-
- threads[t] = new std::thread(inmem_parallel_stream<rank_type, saidx_t>,
- text, text_length, beg, end, last, count, full_gap_buffers,
- empty_gap_buffers, initial_ranks[t], i0, rank, gap->m_length, max_threads,
- gt, temp + t * max_buffer_elems, oracle + t * max_buffer_elems, need_gt);
- }
-
- // Start updating thread.
- std::thread *updater = new std::thread(inmem_gap_updater<saidx_t>,
- full_gap_buffers, empty_gap_buffers, gap, max_threads);
-
- // Wait to all threads to finish.
- for (long t = 0; t < n_threads; ++t) threads[t]->join();
- updater->join();
- streaming_time = utils::wclock() - start;
- long double streaming_speed =
- (right_block_size / (1024.L * 1024)) / streaming_time;
- fprintf(stderr, "%.2Lf (%.2LfMiB/s)\n", streaming_time,
- streaming_speed);
-
- //----------------------------------------------------------------------------
- // STEP 7: clean up and sort gap->m_excess.
- //----------------------------------------------------------------------------
- start = utils::wclock();
- free(oracle);
- free(temp);
- for (long i = 0; i < n_threads; ++i) delete threads[i];
- for (long i = 0; i < n_gap_buffers; ++i) delete gap_buffers[i];
- delete updater;
- delete[] threads;
- delete[] gap_buffers;
- delete empty_gap_buffers;
- delete full_gap_buffers;
- delete rank;
- delete[] count;
-
- std::sort(gap->m_excess.begin(), gap->m_excess.end());
-
- long double cleaning_time = utils::wclock() - start;
- if (cleaning_time > 0.1L)
- fprintf(stderr, " Cleaning: %.2Lf\n", cleaning_time);
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_GAP_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_initial_ranks.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_initial_ranks.h
deleted file mode 100644
index 98a4ac1b..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_compute_initial_ranks.h
+++ /dev/null
@@ -1,922 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/inmem_compute_initial_ranks.h
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_INITIAL_RANKS_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_INITIAL_RANKS_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-
-#include "../background_block_reader.h"
-#include "../multifile.h"
-#include "../multifile_bit_stream_reader.h"
-#include "bwtsa.h"
-#include "pagearray.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-// #define BLOCK_MATRIX_MODULE_DEBUG_MODE
-
-inline int lcp_compare(const unsigned char *text, long text_length,
- const unsigned char *pat, long pat_length, long gt_begin_length,
- long j, multifile_bit_stream_reader &rev_gt_begin_reader, long &lcp) {
- while (lcp < pat_length && j + lcp < text_length && pat[lcp] == text[j + lcp])
- ++lcp;
-
- if (j + lcp >= text_length) {
- if (rev_gt_begin_reader.access(gt_begin_length - (text_length - j))) return 1;
- else return -1;
- } else if (lcp == pat_length) return 0;
- else {
- if (pat[lcp] < text[j + lcp]) return -1;
- else return 1;
- }
-}
-
-inline int lcp_compare(const unsigned char *text, const unsigned char *pat,
- long pat_length, long j, long &lcp) {
- while (lcp < pat_length && pat[lcp] == text[j + lcp]) ++lcp;
- if (lcp == pat_length) return 0;
- else if (pat[lcp] < text[j + lcp]) return -1;
- else return 1;
-}
-
-//------------------------------------------------------------------------------
-// Find the range [left..right) of suffixes starting inside the block that are
-// prefixed with pat[0..pat_length). In case there is no such suffix, left ==
-// right and they both point to the first suffix larger than the pattern.
-//------------------------------------------------------------------------------
-template<typename pagearray_type>
-void compute_range(const unsigned char *text, long block_beg, long block_size,
- const unsigned char *pat, long pat_length, const pagearray_type &bwtsa,
- std::pair<long, long> &ret) {
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- long min_discrepancy = utils::random_long(0L, 10L);
- long balancing_factor = utils::random_long(1L, 10L);
-#else
- static const long min_discrepancy = (1L << 16);
- static const long balancing_factor = 64L;
-#endif
-
- // Find left.
- long low = -1L, high = block_size;
- long llcp = 0, rlcp = 0;
- while (low + 1 != high) {
- // Invariant: left is in the range (low..high].
- long lcp = std::min(llcp, rlcp);
-
- // Compute mid.
- // Valid values for mid are: low + 1, .., high - 1.
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- // Choose the pivot that split the range into two
- // parts of sizes with ratio equal to logd / d.
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else // Discrepancy is too small, use standard binary search.
- mid = (low + high) / 2;
-
- if (lcp_compare(text, pat, pat_length, block_beg + (long)bwtsa[mid].sa, lcp) <= 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- long left = high;
-
- // Find right.
- if (rlcp == pat_length) {
- high = block_size;
- rlcp = 0;
-
- while (low + 1 != high) {
- // Invariant: right is in the range (low..high].
- long lcp = std::min(llcp, rlcp);
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else mid = (low + high) / 2;
-
- if (lcp_compare(text, pat, pat_length, block_beg + (long)bwtsa[mid].sa, lcp) < 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- }
- long right = high;
-
- ret = std::make_pair(left, right);
-}
-
-//------------------------------------------------------------------------------
-// On the entry to the function:
-// - all suffixes in the range [0..left) are smaller than pat[0..old_pat_length),
-// - all suffixes in the range [right..text_length) are larger than the pattern,
-// - suffixes in the range [left..right) are unknown -- they can either be
-// larger or smaller than the pattern, or equal -- in any case, they have a
-// common prefix of length `old_pat_length' with the pattern.
-//------------------------------------------------------------------------------
-template<typename saidx_t>
-void refine_range(const unsigned char *text, long block_beg,
- const bwtsa_t<saidx_t> *block_psa, long left, long right,
- long old_pat_length, long pat_length, const unsigned char *pat,
- long &newleft, long &newright) {
- long low = left - 1;
- long high = right;
- long llcp = old_pat_length;
- long rlcp = old_pat_length;
-
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- long min_discrepancy = utils::random_long(0L, 10L);
- long balancing_factor = utils::random_long(1L, 10L);
-#else
- static const long min_discrepancy = (1L << 16);
- static const long balancing_factor = 64L;
-#endif
-
- while (low + 1 != high) {
- // Invariant: newleft is in the range (low, high].
- long lcp = std::min(llcp, rlcp);
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else mid = (low + high) / 2;
-
- if (lcp_compare(text, pat, pat_length, block_beg + block_psa[mid].sa, lcp) <= 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- newleft = high;
-
- if (rlcp >= pat_length) {
- high = right;
- rlcp = old_pat_length;
-
- while (low + 1 != high) {
- // Invariant: newright is in the range (low, high].
- long lcp = std::min(llcp, rlcp);
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else mid = (low + high) / 2;
-
- if (lcp_compare(text, pat, pat_length, block_beg + block_psa[mid].sa, lcp) < 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- }
- newright = high;
-}
-
-template<typename saidx_t>
-void refine_range(const unsigned char *text, long text_length,
- long tail_gt_begin_reversed_length, long block_beg,
- const bwtsa_t<saidx_t> *block_psa, long left, long right,
- const multifile *tail_gt_begin_reversed,
- long old_pat_length, long pat_length,
- const unsigned char *pat, long &newleft, long &newright) {
- multifile_bit_stream_reader reader(tail_gt_begin_reversed);
-
- long low = left - 1;
- long high = right;
- long llcp = old_pat_length;
- long rlcp = old_pat_length;
-
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- long min_discrepancy = utils::random_long(0L, 10L);
- long balancing_factor = utils::random_long(1L, 10L);
-#else
- static const long min_discrepancy = (1L << 16);
- static const long balancing_factor = 64L;
-#endif
-
- while (low + 1 != high) {
- // Invariant: newleft is in the range (low, high].
- long lcp = std::min(llcp, rlcp);
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else mid = (low + high) / 2;
-
- if (lcp_compare(text, text_length, pat, pat_length, tail_gt_begin_reversed_length,
- block_beg + block_psa[mid].sa, reader, lcp) <= 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- newleft = high;
-
- if (rlcp >= pat_length) {
- high = right;
- rlcp = old_pat_length;
-
- while (low + 1 != high) {
- // Invariant: newright is in the range (low, high].
- long lcp = std::min(llcp, rlcp);
- long mid = 0L;
- if (llcp + min_discrepancy < rlcp) {
- long d = rlcp - llcp;
- long logd = utils::log2ceil(d);
- mid = low + 1 + ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else if (rlcp + min_discrepancy < llcp) {
- long d = llcp - rlcp;
- long logd = utils::log2ceil(d);
- mid = high - 1 - ((high - low - 1) * balancing_factor * logd) / (d + balancing_factor * logd);
- } else mid = (low + high) / 2;
-
- if (lcp_compare(text, text_length, pat, pat_length, tail_gt_begin_reversed_length,
- block_beg + block_psa[mid].sa, reader, lcp) < 0) {
- high = mid;
- rlcp = lcp;
- } else {
- low = mid;
- llcp = lcp;
- }
- }
- }
- newright = high;
-}
-
-//==============================================================================
-// Variant 1: compute ranges for columns other than the last two.
-//==============================================================================
-template<typename saidx_t>
-void compute_ranges_1(const unsigned char *text, long text_length,
- const bwtsa_t<saidx_t> *bwtsa, long max_block_size,
- std::pair<long, long> **primary_range,
- std::pair<long, long> **secondary_range,
- long row, long column) {
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
- long block_end = text_length - (n_blocks - 1 - row) * max_block_size;
- long block_begin = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_begin;
- long pat_start = text_length - (n_blocks - 1 - column) * max_block_size;
-
- const unsigned char *pat = text + pat_start;
- const bwtsa_t<saidx_t> *block_psa = bwtsa + block_begin;
-
- // Check that 0 <= row < column < n_blocks - 2 and
- // pat_start + 2 * max_block_size <= text_length.
- if (0 > row || row >= column || column >= n_blocks - 2 ||
- pat_start + 2L * max_block_size > text_length) {
- fprintf(stdout, "\nError: invariant in compute_ranges_1 failed.\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
-
- long left = 0L;
- long right = block_size;
- long cur_pat_length = 0L;
-
- // Compute the primary range.
- {
- long new_pat_length = max_block_size;
- if (left != right && cur_pat_length < new_pat_length) {
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, block_begin, block_psa, left, right,
- cur_pat_length, new_pat_length, pat, newleft, newright);
- left = newleft;
- right = newright;
- }
- cur_pat_length = new_pat_length;
- }
- primary_range[row][column] = std::make_pair(left, right);
-
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- // Verify the primary range.
- {
- long smaller = 0L;
- long equal = 0L;
- for (long j = block_begin; j < block_end; ++j) {
- long lcp = 0L;
- while (lcp < max_block_size && text[j + lcp] == pat[lcp]) ++lcp;
- if (lcp == max_block_size) ++equal;
- else if (text[j + lcp] < pat[lcp]) ++smaller;
- }
- long check_left = smaller;
- long check_right = smaller + equal;
- if (primary_range[row][column] != std::make_pair(check_left, check_right)) {
- fprintf(stdout, "\nError: incorrect primary range!\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
- }
-#endif
-
- // Compute secondary range.
- {
- long new_pat_length = cur_pat_length + max_block_size;
- if (left != right && cur_pat_length < new_pat_length) {
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, block_begin, block_psa, left, right,
- cur_pat_length, new_pat_length, pat, newleft, newright);
- left = newleft;
- right = newright;
- }
- cur_pat_length = new_pat_length;
- }
- secondary_range[row][column] = std::make_pair(left, right);
-
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- // Verify the secondary range.
- {
- long smaller = 0L;
- long equal = 0L;
- for (long j = block_begin; j < block_end; ++j) {
- long lcp = 0L;
- while (lcp < cur_pat_length && text[j + lcp] == pat[lcp]) ++lcp;
- if (lcp == cur_pat_length) ++equal;
- else if (text[j + lcp] < pat[lcp]) ++smaller;
- }
- long check_left = smaller;
- long check_right = smaller + equal;
- if (secondary_range[row][column] != std::make_pair(check_left, check_right)) {
- fprintf(stdout, "\nError: incorrect secondary range!\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
- }
-#endif
-}
-
-//==============================================================================
-// Variant 2: compute primary and secondary range for second to last column.
-//==============================================================================
-template<typename saidx_t>
-void compute_ranges_2(const unsigned char *text, long text_length,
- long text_beg, long supertext_length, const bwtsa_t<saidx_t> *bwtsa,
- long max_block_size, background_block_reader *reader,
- const unsigned char *next_block,
- std::pair<long, long> **primary_range,
- std::pair<long, long> **secondary_range,
- long row, long column) {
- long text_end = text_beg + text_length;
- long tail_length = supertext_length - text_end;
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
- long block_end = text_length - (n_blocks - 1 - row) * max_block_size;
- long block_begin = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_begin;
- long pat_start = text_length - (n_blocks - 1 - column) * max_block_size;
-
- const unsigned char *pat = text + pat_start;
- const bwtsa_t<saidx_t> *block_psa = bwtsa + block_begin;
-
- // Check that 0 <= row < column and column == n_blocks - 2
- // and pat_start + max_block_size == text_length.
- if (0 > row || row >= column || column != n_blocks - 2 ||
- pat_start + max_block_size != text_length) {
- fprintf(stdout, "\nError: invariant in compute_ranges_2 failed.\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
-
- long left = 0L;
- long right = block_size;
- long cur_pat_length = 0L;
-
- // Compute primary range.
- {
- long new_pat_length = max_block_size;
- if (left != right && cur_pat_length < new_pat_length) {
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, block_begin, block_psa, left, right,
- cur_pat_length, new_pat_length, pat, newleft, newright);
- left = newleft;
- right = newright;
- }
- cur_pat_length = new_pat_length;
- }
- primary_range[row][column] = std::make_pair(left, right);
-
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- // Verify the primary range.
- {
- long smaller = 0L;
- long equal = 0L;
- for (long j = block_begin; j < block_end; ++j) {
- long lcp = 0L;
- while (lcp < cur_pat_length && text[j + lcp] == pat[lcp]) ++lcp;
- if (lcp == cur_pat_length) ++equal;
- else if (text[j + lcp] < pat[lcp]) ++smaller;
- }
- long check_left = smaller;
- long check_right = smaller + equal;
- if (primary_range[row][column] != std::make_pair(check_left, check_right)) {
- fprintf(stdout, "\nError: incorrect primary range!\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
- }
-#endif
-
- static const long chunk_size = (1L << 20);
-
- // Compute secondary range.
- long pat_length = cur_pat_length + std::min(tail_length, max_block_size);
- if (reader) {
- // The reader != NULL, meaning that we have to gradually refine the range.
- while (left != right && cur_pat_length < pat_length) {
- long next_chunk = std::min(chunk_size, pat_length - cur_pat_length);
- long new_pat_length = cur_pat_length + next_chunk;
- reader->wait(new_pat_length - max_block_size);
-
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, block_begin, block_psa, left, right, cur_pat_length,
- new_pat_length, reader->m_data - max_block_size, newleft, newright);
- left = newleft;
- right = newright;
- cur_pat_length = new_pat_length;
- }
- } else {
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- // This version extends the range chunk by chunk (using random chunk
- // lengths) even if the whole next block is available. This is for
- // debugging purpose.
- while (left != right && cur_pat_length < pat_length) {
- long next_chunk = utils::random_long(1L, pat_length - cur_pat_length);
- long new_pat_length = cur_pat_length + next_chunk;
-
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, block_begin, block_psa, left, right, cur_pat_length,
- new_pat_length, next_block - max_block_size, newleft, newright);
- left = newleft;
- right = newright;
- cur_pat_length = new_pat_length;
- }
-#else
- // The whole next block is available, we can just do one binary search.
- long new_pat_length = pat_length;
- if (left != right && cur_pat_length < new_pat_length) {
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, block_begin, block_psa, left, right, cur_pat_length,
- new_pat_length, next_block - max_block_size, newleft, newright);
- left = newleft;
- right = newright;
- }
- cur_pat_length = new_pat_length;
-#endif
- }
- secondary_range[row][column] = std::make_pair(left, right);
-}
-
-//==============================================================================
-// Variant 3: compute primary and secondary range for the last column.
-//==============================================================================
-template<typename saidx_t>
-void compute_ranges_3(const unsigned char *text, long text_length,
- long text_beg, long supertext_length, const bwtsa_t<saidx_t> *bwtsa,
- long max_block_size, const multifile *tail_gt_begin_reversed,
- background_block_reader *reader, const unsigned char *next_block,
- std::pair<long, long> **primary_range,
- std::pair<long, long> **secondary_range,
- long row, long column) {
- long text_end = text_beg + text_length;
- long tail_length = supertext_length - text_end;
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
- long block_end = text_length - (n_blocks - 1 - row) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_beg;
- const bwtsa_t<saidx_t> *block_psa = bwtsa + block_beg;
- long first_range_pat_length = std::min(max_block_size, tail_length);
-
- // length of text stored in next_block (if not NULL)
- long pat_length = std::min(text_length, tail_length);
-
- // Note: max_block_size <= text_length thus
- // first_range_pat_length <= pat_length
-
- // Invariant: one of the following cases hold:
- // (1) next_block != NULL and reader == NULL and next_block stores
- // std::min(text_length, tail_length) symbols after text
- // (2) next_block == NULL and reader != NULL and reader will read
- // std::min(text_length, tail_length) symbols after text
-
- // Check that 0 <= row < colum and column == n_blocks - 1.
- if (0 > row || row >= column || column != n_blocks - 1) {
- fprintf(stdout, "\nError: invariant 1 in compute_ranges_3 failed.\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
-
- long left = 0L;
- long right = block_size;
- long cur_pat_length = 0L;
-
- static const long chunk_size = (1L << 20);
-
- // Compute the primary range.
- if (reader) {
- // The reader != NULL, meaning that we have to gradually refine the range.
- while (left != right && cur_pat_length < first_range_pat_length) {
- long next_chunk = std::min(chunk_size,
- first_range_pat_length - cur_pat_length);
- long new_pat_length = cur_pat_length + next_chunk;
- reader->wait(new_pat_length);
-
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, text_length, tail_length, block_beg, block_psa,
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length,
- reader->m_data, newleft, newright);
- left = newleft;
- right = newright;
- cur_pat_length = new_pat_length;
- }
- } else {
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- // This version extends the range chunk by chunk (using random chunk
- // lengths) even if the whole next block is available. This is for
- // debugging purpose.
- while (left != right && cur_pat_length < first_range_pat_length) {
- long next_chunk = utils::random_long(1L,
- first_range_pat_length - cur_pat_length);
- long new_pat_length = cur_pat_length + next_chunk;
-
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, text_length, tail_length, block_beg, block_psa,
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length,
- next_block, newleft, newright);
- left = newleft;
- right = newright;
- cur_pat_length = new_pat_length;
- }
-#else
- // The whole next block is available, we can just do one binary search.
- long new_pat_length = first_range_pat_length;
- if (left != right && cur_pat_length < new_pat_length) {
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, text_length, tail_length, block_beg, block_psa,
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length,
- next_block, newleft, newright);
- left = newleft;
- right = newright;
- }
- cur_pat_length = new_pat_length;
-#endif
- }
- primary_range[row][column] = std::make_pair(left, right);
-
- // Compute the secondary range.
- if (reader) {
- // The reader != NULL, meaning that we have to gradually refine the range.
- while (left != right && cur_pat_length < pat_length) {
- long next_chunk = std::min(chunk_size, pat_length - cur_pat_length);
- long new_pat_length = cur_pat_length + next_chunk;
- reader->wait(new_pat_length);
-
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, text_length, tail_length, block_beg, block_psa,
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length,
- reader->m_data, newleft, newright);
- left = newleft;
- right = newright;
- cur_pat_length = new_pat_length;
- }
- } else {
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- // This version extends the range chunk by chunk (using random chunk
- // lengths) even if the whole next block is available. This is for
- // debugging purpose.
- while (left != right && cur_pat_length < pat_length) {
- long next_chunk = utils::random_long(1L, pat_length - cur_pat_length);
- long new_pat_length = cur_pat_length + next_chunk;
-
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, text_length, tail_length, block_beg, block_psa,
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length,
- next_block, newleft, newright);
- left = newleft;
- right = newright;
- cur_pat_length = new_pat_length;
- }
-#else
- // The whole next block is available, we can just do one binary search.
- long new_pat_length = pat_length;
- if (left != right && cur_pat_length < new_pat_length) {
- long newleft = 0L;
- long newright = 0L;
- refine_range(text, text_length, tail_length, block_beg, block_psa,
- left, right, tail_gt_begin_reversed, cur_pat_length, new_pat_length,
- next_block, newleft, newright);
- left = newleft;
- right = newright;
- }
- cur_pat_length = new_pat_length;
-#endif
- }
- secondary_range[row][column] = std::make_pair(left, right);
-
- if (left != right && text_length <= tail_length) {
- fprintf(stdout, "\nError: left != right && text_length <= tail_length.\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
-}
-
-template<typename saidx_t>
-void task_solver_code(const unsigned char *text,
- long text_length, const bwtsa_t<saidx_t> *bwtsa,
- long max_block_size,
- std::pair<long, long> **primary_range,
- std::pair<long, long> **secondary_range,
- std::vector<std::pair<long, long> > &tasks,
- std::mutex &tasks_mutex) {
- while (true) {
- // Get a task from the task collection.
- std::pair<long, long> task;
- bool task_avail = true;
- std::unique_lock<std::mutex> lk(tasks_mutex);
- if (tasks.empty()) task_avail = false;
- else {
- task = tasks.back();
- tasks.pop_back();
- }
- lk.unlock();
-
- if (!task_avail) break;
-
- // Solve the task and save the answer.
- compute_ranges_1(text, text_length, bwtsa, max_block_size,
- primary_range, secondary_range, task.first, task.second);
- }
-}
-
-template<typename saidx_t>
-void compute_block_rank_matrix(const unsigned char *text, long text_length,
- const bwtsa_t<saidx_t> *bwtsa, long max_block_size, long text_beg,
- long supertext_length, std::string,
- const multifile *tail_gt_begin_reversed, background_block_reader *reader,
- const unsigned char *next_block, long **block_rank_matrix) {
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
- long text_end = text_beg + text_length;
- long tail_length = supertext_length - text_end;
-
- // Allocate primary and secondary ranges.
- std::pair<long, long> **primary_range = new std::pair<long, long>*[n_blocks];
- std::pair<long, long> **secondary_range = new std::pair<long, long>*[n_blocks];
- for (long row = 0; row < n_blocks; ++row) {
- primary_range[row] = new std::pair<long, long>[n_blocks];
- secondary_range[row] = new std::pair<long, long>[n_blocks];
- }
-
- //----------------------------------------------------------------------------
- // STEP 1: Start the threads computing ranges for the last column
- //----------------------------------------------------------------------------
- std::thread **threads_last_col = NULL;
- if (n_blocks > 1) {
- threads_last_col = new std::thread*[n_blocks - 1];
- for (long row = 0; row + 1 < n_blocks; ++row) {
- long column = n_blocks - 1;
- threads_last_col[row] = new std::thread(compute_ranges_3<saidx_t>, text,
- text_length, text_beg, supertext_length, bwtsa, max_block_size,
- tail_gt_begin_reversed, reader, next_block, primary_range,
- secondary_range, row, column);
- }
- }
-
- //----------------------------------------------------------------------------
- // STEP 2: Start the threads computing ranges for the second-to-last column.
- //----------------------------------------------------------------------------
- std::thread **threads_second_last_col = NULL;
- if (n_blocks > 2) {
- threads_second_last_col = new std::thread*[n_blocks - 2];
- for (long row = 0; row + 2 < n_blocks; ++row) {
- long column = n_blocks - 2;
- threads_second_last_col[row] = new std::thread(compute_ranges_2<saidx_t>,
- text, text_length, text_beg, supertext_length, bwtsa, max_block_size,
- reader, next_block, primary_range, secondary_range, row, column);
- }
- }
-
- //----------------------------------------------------------------------------
- // STEP 3: Start threads computing columns other than the last two.
- //----------------------------------------------------------------------------
- std::vector<std::pair<long, long> > tasks;
- std::mutex tasks_mutex;
- for (long row = 0; row < n_blocks; ++row)
- for (long col = row + 1; col + 2 < n_blocks; ++col)
- tasks.push_back(std::make_pair(row, col));
- std::random_shuffle(tasks.begin(), tasks.end()); // solve in any order
- std::thread **threads_other = new std::thread*[n_blocks];
- for (long t = 0; t < n_blocks; ++t)
- threads_other[t] = new std::thread(task_solver_code<saidx_t>, text,
- text_length, bwtsa, max_block_size, primary_range, secondary_range,
- std::ref(tasks), std::ref(tasks_mutex));
-
- //----------------------------------------------------------------------------
- // STEP 4: Wait for all threads to finish.
- //----------------------------------------------------------------------------
-
- // 4.1
- //
- // Wait for the threads computing columns other than last two.
- for (long t = 0; t < n_blocks; ++t) threads_other[t]->join();
- for (long t = 0; t < n_blocks; ++t) delete threads_other[t];
- delete[] threads_other;
-
- // 4.2
- //
- // Wait for the threads computing second-to-last column to finish.
- if (n_blocks > 2) {
- for (long row = 0; row + 2 < n_blocks; ++row)
- threads_second_last_col[row]->join();
- for (long row = 0; row + 2 < n_blocks; ++row)
- delete threads_second_last_col[row];
- delete[] threads_second_last_col;
- }
-
- // 4.3
- //
- // Wait for the threads computing the last column to finish.
- if (n_blocks > 1) {
- for (long row = 0; row + 1 < n_blocks; ++row) threads_last_col[row]->join();
- for (long row = 0; row + 1 < n_blocks; ++row) delete threads_last_col[row];
- delete[] threads_last_col;
- }
-
- //----------------------------------------------------------------------------
- // STEP 5: Compute the rank values from primary and secondary ranges.
- //----------------------------------------------------------------------------
- for (long row = n_blocks - 1; row >= 0; --row) {
- for (long col = n_blocks - 1; col > row; --col) {
- long left = secondary_range[row][col].first;
- long right = secondary_range[row][col].second;
-
- if (col != n_blocks - 1 &&
- (col != n_blocks - 2 || tail_length >= max_block_size)) {
- long cur_block_end = text_length - (n_blocks - 1 - row) * max_block_size;
- long cur_block_beg = std::max(0L, cur_block_end - max_block_size);
- long cur_block_size = cur_block_end - cur_block_beg;
- long shift = max_block_size - cur_block_size;
- long next_block_end = text_length - (n_blocks - 1 - (row + 1)) * max_block_size;
- long next_block_beg = std::max(0L, next_block_end - max_block_size);
-
- const bwtsa_t<saidx_t> *cur_block_psa = bwtsa + cur_block_beg;
- const bwtsa_t<saidx_t> *next_block_psa = bwtsa + next_block_beg;
-
- // Compute the ranges.
- long next_primary_range_beg = primary_range[row + 1][col + 1].first;
- long next_primary_range_end = primary_range[row + 1][col + 1].second;
- long next_primary_range_size = next_primary_range_end -
- next_primary_range_beg;
-
- // Compute the difference of the arithmetic progression.
- long delta = 0L;
- long next_psa_first = 0L;
- long next_psa_second = 0L;
- if (next_primary_range_size > 1) {
- next_psa_first = next_block_psa[next_primary_range_beg].sa;
- next_psa_second = next_block_psa[next_primary_range_beg + 1].sa;
- delta = next_psa_second - next_psa_first;
- }
-
- // Invariant:
- // 1. the primary range of next block contains (possibly
- // zero) values forming an arithmetic progression,
- // 2. elements in the range [left..right) of the psa of the
- // current block incremented by `shift' appear in the primary
- // range of the next block.
-
-#ifdef BLOCK_MATRIX_MODULE_DEBUG_MODE
- // Check that both invariants hold.
- for (long j = next_primary_range_end; j + 1 < next_primary_range_end; ++j)
- if ((long)next_block_psa[j + 1].sa - (long)next_block_psa[j].sa != delta) {
- fprintf(stdout, "Invariant 1 failed.\n"); std::exit(EXIT_FAILURE); }
- for (long j = left; j < right; ++j) {
- long suf = cur_block_psa[j].sa + shift;
- bool found = false;
- for (long jj = next_primary_range_beg; jj < next_primary_range_end; ++jj)
- if ((long)next_block_psa[jj].sa == suf) { found = true; break; }
- if (!found) {
- fprintf(stdout, "Invariant 2 failed.\n");
- std::fflush(stdout);
- std::exit(EXIT_FAILURE);
- }
- }
-#endif
-
- // Keep refining the range [left..right) until it's empty.
- while (left != right) {
- // Valid values for mid are in [left..right).
- long mid = (left + right) / 2;
- long suf = (long)cur_block_psa[mid].sa + shift;
-
- // Locate suf in next_block_psa using invariants 1. and 2.
- long pos = next_primary_range_beg;
- if (next_primary_range_size > 1)
- pos += (suf - next_psa_first) / delta;
-
- // Refine the range.
- if (pos < block_rank_matrix[row + 1][col + 1]) left = mid + 1;
- else right = mid;
- }
- }
-
- block_rank_matrix[row][col] = left;
- }
- }
-
- // Clean up.
- for (long row = 0; row < n_blocks; ++row) {
- delete[] primary_range[row];
- delete[] secondary_range[row];
- }
- delete[] primary_range;
- delete[] secondary_range;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_COMPUTE_INITIAL_RANKS_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_gap_array.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_gap_array.h
deleted file mode 100644
index 7b0b0381..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_gap_array.h
+++ /dev/null
@@ -1,213 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/inmem_gap_array.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_SASCAN_INMEM_GAP_ARRAY_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_SASCAN_INMEM_GAP_ARRAY_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <vector>
-#include <algorithm>
-#include <mutex>
-#include <stack>
-#include <thread>
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-struct inmem_gap_array {
- unsigned char *m_count;
- long m_length;
-
- std::vector<long> m_excess;
- std::mutex m_excess_mutex;
-
- inmem_gap_array(long length)
- : m_length(length) {
- m_count = (unsigned char *)calloc(m_length, sizeof(unsigned char));
- }
-
- ~inmem_gap_array() {
- free(m_count);
- }
-
- //==============================================================================
- // Find and smallest j such that j + gap[0] + .. + gap[j] >= a. Store
- // the value of j into b and gap[0] + .. + gap[j] into c. To speed up the
- // algorithm, we have array gapsum defined as
- //
- // gapsum[i] = gap[0] + .. + gap[i * block_size - 1].
- //
- //==============================================================================
- static void answer_single_gap_query(const inmem_gap_array *gap, long block_size,
- const long *gapsum, long a, long &b, long &c) {
- long n_blocks = (gap->m_length + block_size - 1) / block_size;
-
- // Find the block containing the correct index. To do that find the largest
- // j such that gapsum[j] + block_size * j - 1 < a and start searching from
- // j * block_size.
- long j = 0;
- while (j + 1 < n_blocks && gapsum[j + 1] + block_size * (j + 1) - 1 < a) ++j;
- // Invariant: the j we are searching for is > j * block_size - 1.
-
- long sum = gapsum[j];
- j = block_size * j;
- size_t excess_ptr = std::lower_bound(gap->m_excess.begin(),
- gap->m_excess.end(), j) - gap->m_excess.begin();
- while (true) {
- // Invariant: sum = gap[0] + .. + gap[j - 1].
- // Compute gap[j] using small gap array representation.
- long gap_j = gap->m_count[j];
- while (excess_ptr < gap->m_excess.size() && gap->m_excess[excess_ptr] == j) {
- gap_j += 256L;
- ++excess_ptr;
- }
-
- if (j + sum + gap_j >= a) { b = j; c = sum + gap_j; return; }
- else { sum += gap_j; ++j; }
- }
- }
-
- //==============================================================================
- // Compute gap[0] + gap[1] + .. + gap[j - 1] with the help of gapsum array.
- //==============================================================================
- static long compute_sum3(const inmem_gap_array *gap, long j,
- long max_block_size, long *gapsum) {
- long block_id = j / max_block_size;
- long result = gapsum[block_id];
-
- long scan_beg = block_id * max_block_size;
- long scan_end = j;
- long occ = std::upper_bound(gap->m_excess.begin(), gap->m_excess.end(), scan_end - 1)
- - std::lower_bound(gap->m_excess.begin(), gap->m_excess.end(), scan_beg);
- result += 256L * std::max(0L, occ);
- for (long i = block_id * max_block_size; i < j; ++i)
- result += gap->m_count[i];
-
- return result;
- }
-
- //==============================================================================
- // Compute sum of gap values for blocks in range [range_beg..range_end).
- // The sum for each block is stored in gapsum array.
- //==============================================================================
- static void compute_sum2(const inmem_gap_array *gap, long range_beg,
- long range_end, long max_block_size, long *gapsum) {
- for (long block_id = range_beg; block_id < range_end; ++block_id) {
- long block_beg = block_id * max_block_size;
- long block_end = std::min(block_beg + max_block_size, gap->m_length);
-
- // Process block.
- long occ = std::upper_bound(gap->m_excess.begin(), gap->m_excess.end(), block_end - 1)
- - std::lower_bound(gap->m_excess.begin(), gap->m_excess.end(), block_beg);
- long block_gap_sum = 256L * std::max(0L, occ);
- for (long j = block_beg; j < block_end; ++j)
- block_gap_sum += gap->m_count[j];
-
- gapsum[block_id] = block_gap_sum;
- }
- }
-
- //==============================================================================
- // Parallel computaton of answers to n_queries queries of the form:
- // What is the smallest j such that j + gap[0] + .. + gap[j] >= a[i]"
- // - the answer to i-th query is stored in b[i]
- // - in addition we also return gap[0] + .. + gap[j] in c[i]
- //
- // To do that we first split the gap array into blocks of size of about
- // length / max_threads and (in parallel) compute sums of gap values inside
- // these blocks. We the accumulate these sums into array of prefix sums.
- //
- // To answer each of the queries we start a separate thread. Each thread uses
- // the partial sums of gap array at block boundaries to find a good starting
- // point for search and then scans the gap array from there.
- //==============================================================================
- long answer_queries(long n_queries, const long *a, long *b, long *c,
- long max_threads, long i0) const {
- //----------------------------------------------------------------------------
- // STEP 1: split gap array into at most max_threads blocks
- // and in parallel compute sum of values inside each block.
- //----------------------------------------------------------------------------
- long max_block_size = std::min(4L << 20, (m_length + max_threads - 1) / max_threads);
- long n_blocks = (m_length + max_block_size - 1) / max_block_size;
- long *gapsum = new long[n_blocks];
-
- // Each thread handles range of blocks.
- long range_size = (n_blocks + max_threads - 1) / max_threads;
- long n_ranges = (n_blocks + range_size - 1) / range_size;
- std::thread **threads = new std::thread*[max_threads];
- for (long range_id = 0; range_id < n_ranges; ++range_id) {
- long range_beg = range_id * range_size;
- long range_end = std::min(range_beg + range_size, n_blocks);
-
- threads[range_id] = new std::thread(compute_sum2, this,
- range_beg, range_end, max_block_size, gapsum);
- }
- for (long i = 0; i < n_ranges; ++i) threads[i]->join();
- for (long i = 0; i < n_ranges; ++i) delete threads[i];
- delete[] threads;
-
- //----------------------------------------------------------------------------
- // STEP 2: compute partial sum from block counts.
- //----------------------------------------------------------------------------
- for (long i = 0, s = 0, t; i < n_blocks; ++i)
- { t = gapsum[i]; gapsum[i] = s; s += t; }
-
- //----------------------------------------------------------------------------
- // STEP 3: Answer the queries in parallel.
- //----------------------------------------------------------------------------
- threads = new std::thread*[n_queries];
- for (long i = 0; i < n_queries; ++i)
- threads[i] = new std::thread(answer_single_gap_query, this,
- max_block_size, gapsum, a[i], std::ref(b[i]), std::ref(c[i]));
- for (long i = 0; i < n_queries; ++i) threads[i]->join();
- for (long i = 0; i < n_queries; ++i) delete threads[i];
- delete[] threads;
-
- long result = -1;
- if (i0 != -1)
- result = compute_sum3(this, i0 + 1, max_block_size, gapsum);
-
- delete[] gapsum;
-
- return result;
- }
-};
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_GAP_ARRAY_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_psascan.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_psascan.h
deleted file mode 100644
index a71347c8..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_psascan.h
+++ /dev/null
@@ -1,309 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/inmem_psascan.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_PSASCAN_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_PSASCAN_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <cmath>
-#include <vector>
-#include <limits>
-#include <algorithm>
-
-#include "../bitvector.h"
-#include "../multifile.h"
-#include "../background_block_reader.h"
-#include "inmem_gap_array.h"
-#include "compute_initial_gt_bitvectors.h"
-#include "initial_partial_sufsort.h"
-#include "change_gt_reference_point.h"
-#include "inmem_bwt_from_sa.h"
-#include "inmem_compute_initial_ranks.h"
-#include "parallel_merge.h"
-#include "inmem_bwtsa_merge.h"
-#include "pagearray.h"
-#include "bwtsa.h"
-#include "parallel_shrink.h"
-#include "merge_schedule.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename saidx_t, unsigned pagesize_log = 12>
-void inmem_psascan(
- unsigned char *text,
- long text_length,
- unsigned char *sa_bwt,
- long max_threads = 1,
- bool compute_bwt = false,
- bool compute_gt_begin = false,
- bitvector *gt_begin = NULL,
- long max_blocks = -1,
- long text_beg = 0,
- long text_end = 0,
- long supertext_length = 0,
- std::string supertext_filename = "",
- const multifile *tail_gt_begin_reversed = NULL,
- long *i0 = NULL,
- unsigned char *tail_prefix_preread = NULL) {
- static const unsigned pagesize = (1U << pagesize_log);
- long double absolute_start = utils::wclock();
- long double start;
-
- if ((long)std::numeric_limits<saidx_t>::max() < text_length) {
- fprintf(stderr, "Error: text is too long (%ld bytes),\n", text_length);
- fprintf(stderr, " std::numeric_limits<saidx_t>::max() = %ld\n",
- (long)std::numeric_limits<saidx_t>::max());
- std::exit(EXIT_FAILURE);
- }
-
- if (max_blocks == -1)
- max_blocks = max_threads;
-
- if (text_end == 0) {
- supertext_length = text_length;
- text_end = text_length;
- text_beg = 0;
- supertext_filename = "";
- tail_gt_begin_reversed = NULL;
- }
-
- bool has_tail = (text_end != supertext_length);
-
- if (!has_tail && tail_prefix_preread != NULL) {
- fprintf(stderr, "Error: has_tail == false but tail_prefix_preread != NULL\n");
- std::exit(EXIT_FAILURE);
- }
-
- long alignment_unit = (long)std::max(pagesize, 8U);
- long max_block_size = (text_length + max_blocks - 1) / max_blocks;
- while ((max_block_size & (alignment_unit - 1)) && max_block_size < text_length)
- ++max_block_size;
-
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
-
- if (!compute_gt_begin) {
- if (gt_begin) {
- fprintf(stderr, "Error: check gt_begin == NULL failed\n");
- std::exit(EXIT_FAILURE);
- }
- if (n_blocks > 1 || has_tail)
- gt_begin = new bitvector(text_length);
- } else {
- if (!gt_begin) {
- fprintf(stderr, "inmem_sascan: gt_begin was requested but is not allocated!\n");
- std::exit(EXIT_FAILURE);
- }
- }
-
- fprintf(stderr, "Text length = %ld (%.2LfMiB)\n", text_length, text_length / (1024.L * 1024));
- fprintf(stderr, "Max block size = %ld (%.2LfMiB)\n", max_block_size, max_block_size / (1024.L * 1024));
- fprintf(stderr, "Max blocks = %ld\n", max_blocks);
- fprintf(stderr, "Number of blocks = %ld\n", n_blocks);
- fprintf(stderr, "Max threads = %ld\n", max_threads);
- fprintf(stderr, "sizeof(saidx_t) = %lu\n", sizeof(saidx_t));
- fprintf(stderr, "Pagesize = %u\n", (1U << pagesize_log));
- fprintf(stderr, "Compute bwt = %s\n", compute_bwt ? "true" : "false");
- fprintf(stderr, "Compute gt begin = %s\n", compute_gt_begin ? "true" : "false");
- fprintf(stderr, "Text beg = %ld\n", text_beg);
- fprintf(stderr, "Text end = %ld\n", text_end);
- fprintf(stderr, "Supertext length = %ld (%.2LfMiB)\n", supertext_length, supertext_length / (1024.L * 1024));
- fprintf(stderr, "Supertext filename = %s\n", supertext_filename.c_str());
- fprintf(stderr, "Has tail = %s\n", has_tail ? "true" : "false");
- fprintf(stderr, "\n");
-
- bwtsa_t<saidx_t> *bwtsa = (bwtsa_t<saidx_t> *)sa_bwt;
-
- // Initialize reading of the tail prefix in the background.
- long tail_length = supertext_length - text_end;
- long tail_prefix_length = std::min(text_length, tail_length);
-
- background_block_reader *tail_prefix_background_reader = NULL;
- if (has_tail && tail_prefix_preread == NULL)
- tail_prefix_background_reader =
- new background_block_reader(supertext_filename, text_end, tail_prefix_length);
-
- //----------------------------------------------------------------------------
- // STEP 1: compute initial bitvectors, and partial suffix arrays.
- //----------------------------------------------------------------------------
- if (n_blocks > 1 || compute_gt_begin || has_tail) {
- fprintf(stderr, "Compute initial bitvectors:\n");
- start = utils::wclock();
- compute_initial_gt_bitvectors(text, text_length, gt_begin, max_block_size,
- max_threads, text_end, supertext_length, tail_gt_begin_reversed,
- tail_prefix_background_reader, tail_prefix_preread);
- fprintf(stderr, "Time: %.2Lf\n\n", utils::wclock() - start);
- }
-
- fprintf(stderr, "Initial sufsort:\n");
- start = utils::wclock();
- initial_partial_sufsort(text, text_length, gt_begin, bwtsa, max_block_size, max_threads, has_tail);
- fprintf(stderr, "Time: %.2Lf\n", utils::wclock() - start);
-
- //----------------------------------------------------------------------------
- // STEP 2: compute matrix of block ranks.
- //----------------------------------------------------------------------------
- fprintf(stderr, "Compute matrix of initial ranks: ");
- start = utils::wclock();
- long **block_rank_matrix = new long*[n_blocks];
- for (long j = 0; j < n_blocks; ++j)
- block_rank_matrix[j] = new long[n_blocks];
- compute_block_rank_matrix<saidx_t>(text, text_length, bwtsa,
- max_block_size, text_beg, supertext_length, supertext_filename,
- tail_gt_begin_reversed, tail_prefix_background_reader,
- tail_prefix_preread, block_rank_matrix);
-
- // Stop reading next block in the background or free memory taken by next block.
- if (has_tail) {
- if (tail_prefix_background_reader != NULL) {
- tail_prefix_background_reader->stop();
- delete tail_prefix_background_reader;
- } else free(tail_prefix_preread);
- }
-
- fprintf(stderr, "%.2Lf\n\n", utils::wclock() - start);
-
- //----------------------------------------------------------------------------
- // STEP 3: compute the gt bitvectors for blocks that will be on the right
- // side during the merging.
- //----------------------------------------------------------------------------
- if (n_blocks > 1 || compute_gt_begin) {
- fprintf(stderr, "Overwriting gt_end with gt_begin: ");
- start = utils::wclock();
- gt_end_to_gt_begin(text, text_length, gt_begin, max_block_size);
- fprintf(stderr, "%.2Lf\n\n", utils::wclock() - start);
- }
-
- float rl_ratio = 10.L; // estimated empirically
- long max_ram_usage_per_input_byte = 10L; // peak ram usage = 10n
- int max_left_size = std::max(1, (int)floor(n_blocks * (((long double)max_ram_usage_per_input_byte - (2.125L + sizeof(saidx_t))) / 5.L)));
- fprintf(stderr, "Assumed rl_ratio: %.2f\n", rl_ratio);
- fprintf(stderr, "Max left size = %d\n", max_left_size);
- fprintf(stderr, "Peak memory usage during last merging = %.3Lfn\n",
- (2.125L + sizeof(saidx_t)) + (5.L * max_left_size) / n_blocks);
- MergeSchedule schedule(n_blocks, rl_ratio, max_left_size);
-
- fprintf(stderr, "Skewed merge schedule:\n");
- print_schedule(schedule, n_blocks);
- fprintf(stderr, "\n");
-
- long *i0_array = new long[n_blocks];
- if (n_blocks > 1 || compute_bwt) {
- for (long block_id = 0; block_id < n_blocks; ++block_id) {
- long block_end = text_length - (n_blocks - 1 - block_id) * max_block_size;
- long block_beg = std::max(0L, block_end - max_block_size);
- long block_size = block_end - block_beg;
-
- if (block_id + 1 != n_blocks || compute_bwt) {
- fprintf(stderr, "Computing BWT for block %ld: ", block_id + 1);
- long double bwt_start = utils::wclock();
- compute_bwt_in_bwtsa<saidx_t>(text + block_beg, block_size,
- bwtsa + block_beg, max_threads, i0_array[block_id]);
- fprintf(stderr, "%.2Lf\n", utils::wclock() - bwt_start);
- }
- }
- fprintf(stderr, "\n");
- }
-
- if (n_blocks > 1) {
- long i0_result;
- pagearray<bwtsa_t<saidx_t>, pagesize_log> *result =
- inmem_bwtsa_merge<saidx_t, pagesize_log>(text, text_length, bwtsa,
- gt_begin, max_block_size, 0, n_blocks, max_threads, compute_gt_begin,
- compute_bwt, i0_result, schedule, text_beg, text_end,
- supertext_length, supertext_filename, tail_gt_begin_reversed,
- i0_array, block_rank_matrix);
- if (i0) *i0 = i0_result;
-
- // Permute SA to plain array.
- fprintf(stderr, "\nPermuting the resulting SA to plain array: ");
- start = utils::wclock();
- result->permute_to_plain_array(max_threads);
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- delete result;
- } else if (compute_bwt) {
- if (i0) *i0 = i0_array[0];
- }
- delete[] i0_array;
- for (long j = 0; j < n_blocks; ++j)
- delete[] block_rank_matrix[j];
- delete[] block_rank_matrix;
-
- if (!compute_gt_begin && (n_blocks > 1 || has_tail)) {
- delete gt_begin;
- gt_begin = NULL;
- }
-
- unsigned char *bwt = NULL;
- if (compute_bwt) {
- // Allocate aux, copy bwt into aux.
- fprintf(stderr, "Copying bwtsa.bwt into aux memory: ");
- start = utils::wclock();
- bwt = (unsigned char *)malloc(text_length);
- parallel_copy<bwtsa_t<saidx_t>, unsigned char>(bwtsa, bwt, text_length, max_threads);
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
- }
-
- fprintf(stderr, "Shrinking bwtsa.sa into sa: ");
- start = utils::wclock();
-
- parallel_shrink<bwtsa_t<saidx_t>, saidx_t>(bwtsa, text_length, max_threads);
-
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
-
- if (compute_bwt) {
- // Copy from aux into the end of bwtsa.
- fprintf(stderr, "Copying bwt from aux memory to the end of bwtsa: ");
- start = utils::wclock();
- unsigned char *dest = (unsigned char *)(((saidx_t *)bwtsa) + text_length);
- parallel_copy<unsigned char, unsigned char>(bwt, dest, text_length, max_threads);
- free(bwt);
- fprintf(stderr, "%.2Lf\n", utils::wclock() - start);
- }
-
- long double total_sascan_time = utils::wclock() - absolute_start;
- fprintf(stderr, "\nTotal time:\n");
- fprintf(stderr, "\tabsolute: %.2Lf\n", total_sascan_time);
- fprintf(stderr, "\trelative: %.4Lfs/MiB\n", total_sascan_time / ((long double)text_length / (1 << 20)));
- fprintf(stderr, "Speed: %.2LfMiB/s\n", ((long double)text_length / (1 << 20)) / total_sascan_time);
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_PSASCAN_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_stream.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_stream.h
deleted file mode 100644
index bc3f4f1c..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_stream.h
+++ /dev/null
@@ -1,276 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/inmem_stream.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_STREAM_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_STREAM_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <iostream>
-#include <queue>
-#include <string>
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-#include <algorithm>
-
-#include "../bitvector.h"
-#include "../gap_buffer.h"
-#include "../utils.h"
-#include "rank.h"
-#include "inmem_update.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-//==============================================================================
-// The main streaming function.
-//
-// Note:
-// * it reads and writes bits in range
-// [stream_block_beg..stream_block_end) from gt bitvector right to left.
-//==============================================================================
-template<typename rank_type, typename block_offset_type>
-void inmem_parallel_stream(
- const unsigned char *text,
- long text_length,
- long stream_block_beg,
- long stream_block_end,
- unsigned char last,
- const long *count,
- gap_buffer_poll<block_offset_type> *full_gap_buffers,
- gap_buffer_poll<block_offset_type> *empty_gap_buffers,
- block_offset_type i,
- block_offset_type i0,
- const rank_type *rank,
- long gap_range_size,
- long n_increasers,
- bitvector *gt,
- block_offset_type *temp,
- int *oracle,
- bool need_gt) {
-
- //----------------------------------------------------------------------------
- // STEP 1: initialize structures necessary to do the buffer partitions.
- //----------------------------------------------------------------------------
- static const int max_buckets = 4096;
- int *block_id_to_sblock_id = new int[max_buckets];
-
- long bucket_size = 1;
- long bucket_size_bits = 0;
- while ((gap_range_size + bucket_size - 1) / bucket_size > max_buckets)
- bucket_size <<= 1, ++bucket_size_bits;
- long n_buckets = (gap_range_size + bucket_size - 1) / bucket_size;
- int *block_count = new int[n_buckets];
-
- static const long buffer_sample_size = 512;
- std::vector<block_offset_type> samples(buffer_sample_size);
- long *ptr = new long[n_increasers];
- block_offset_type *bucket_lbound = new block_offset_type[n_increasers + 1];
-
- //----------------------------------------------------------------------------
- // STEP 2: perform the actual streaming.
- //----------------------------------------------------------------------------
- long j = stream_block_end;
- bool gt_bit = gt->get(text_length - j);
- while (j > stream_block_beg) {
- // 2.a
- //
- // Get a buffer from the poll of empty buffers.
- std::unique_lock<std::mutex> lk(empty_gap_buffers->m_mutex);
- while (!empty_gap_buffers->available()) empty_gap_buffers->m_cv.wait(lk);
- gap_buffer<block_offset_type> *b = empty_gap_buffers->get();
- lk.unlock();
- empty_gap_buffers->m_cv.notify_one();
-
- // 2.b
- //
- // Process buffer, i.e., fill with gap values.
- long left = j - stream_block_beg;
- b->m_filled = std::min(left, b->m_size);
- std::fill(block_count, block_count + n_buckets, 0);
-
- if (need_gt) {
- for (long t = 0; t < b->m_filled; ++t) {
- bool new_gt_bit = (i > i0);
- if (new_gt_bit) gt->set(text_length - j);
- else gt->reset(text_length - j);
-
- unsigned char c = text[j - 1];
-
- // Compute new i.
- int delta = (new_gt_bit && c == 0);
- i = (block_offset_type)(count[c] + rank->rank(i, c) - delta);
- if (c == last && gt_bit) ++i;
-
- temp[t] = i;
- block_count[i >> bucket_size_bits]++;
-
- --j;
- gt_bit = gt->get(text_length - j);
- }
- } else {
- for (long t = 0; t < b->m_filled; ++t) {
- bool new_gt_bit = (i > i0);
-
- unsigned char c = text[j - 1];
-
- // Compute new i.
- int delta = (new_gt_bit && c == 0);
- i = (block_offset_type)(count[c] + rank->rank(i, c) - delta);
- if (c == last && gt_bit) ++i;
-
- temp[t] = i;
- block_count[i >> bucket_size_bits]++;
-
- --j;
- gt_bit = gt->get(text_length - j);
- }
-
- }
-
- // 2.c
- //
- // Partition the buffer into equal n_increasers parts.
-
- // Compute super-buckets.
- long ideal_sblock_size = (b->m_filled + n_increasers - 1) / n_increasers;
- long max_sbucket_size = 0;
- long bucket_id_beg = 0;
- for (long t = 0; t < n_increasers; ++t) {
- long bucket_id_end = bucket_id_beg, size = 0L;
- while (bucket_id_end < n_buckets && size < ideal_sblock_size)
- size += block_count[bucket_id_end++];
- b->sblock_size[t] = size;
- max_sbucket_size = std::min(max_sbucket_size, size);
- for (long id = bucket_id_beg; id < bucket_id_end; ++id)
- block_id_to_sblock_id[id] = t;
- bucket_id_beg = bucket_id_end;
- }
-
- if (max_sbucket_size < 4L * ideal_sblock_size) {
- // The quick partition was good enough.
- for (long t = 0, curbeg = 0; t < n_increasers; curbeg += b->sblock_size[t++])
- b->sblock_beg[t] = ptr[t] = curbeg;
-
- // Permute the elements of the buffer.
- for (long t = 0; t < b->m_filled; ++t) {
- long id = (temp[t] >> bucket_size_bits);
- long sblock_id = block_id_to_sblock_id[id];
- oracle[t] = ptr[sblock_id]++;
- }
-
- for (long t = 0; t < b->m_filled; ++t) {
- long addr = oracle[t];
- b->m_content[addr] = temp[t];
- }
- } else {
- // Repeat the partition into sbuckets, this time using random sample.
- // This is a fallback mechanism in case the quick partition failed,
- // and is expected to happen very rarely.
-
- // Compute random sample of elements in the buffer.
- for (long t = 0; t < buffer_sample_size; ++t)
- samples[t] = temp[utils::random_long(0L, b->m_filled - 1)];
- std::sort(samples.begin(), samples.end());
- samples.erase(std::unique(samples.begin(), samples.end()), samples.end());
-
- // Compute bucket boundaries (lower bound is enough).
- std::fill(bucket_lbound, bucket_lbound + n_increasers + 1, gap_range_size);
-
- long step = (samples.size() + n_increasers - 1) / n_increasers;
- for (size_t t = 1, p = step; p < samples.size(); ++t, p += step)
- bucket_lbound[t] = (samples[p - 1] + samples[p] + 1) / 2;
- bucket_lbound[0] = 0;
-
- // Compute bucket sizes and sblock id into oracle array.
- std::fill(b->sblock_size, b->sblock_size + n_increasers, 0L);
- for (long t = 0; t < b->m_filled; ++t) {
- block_offset_type x = temp[t];
- int id = n_increasers;
- while (bucket_lbound[id] > x) --id;
- oracle[t] = id;
- b->sblock_size[id]++;
- }
-
- // Permute elements into their own buckets using oracle.
- for (long t = 0, curbeg = 0; t < n_increasers; curbeg += b->sblock_size[t++])
- b->sblock_beg[t] = ptr[t] = curbeg;
-
- for (long t = 0; t < b->m_filled; ++t) {
- long sblock_id = oracle[t];
- oracle[t] = ptr[sblock_id]++;
- }
-
- for (long t = 0; t < b->m_filled; ++t) {
- long addr = oracle[t];
- b->m_content[addr] = temp[t];
- }
- }
-
- // 2.d
- //
- // Add the buffer to the poll of full buffers and notify waiting thread.
- std::unique_lock<std::mutex> lk2(full_gap_buffers->m_mutex);
- full_gap_buffers->add(b);
- lk2.unlock();
- full_gap_buffers->m_cv.notify_one();
- }
-
- //---------------------------------------------------------------------------
- // STEP 3: Clean up.
- //---------------------------------------------------------------------------
-
- // Report that another thread has finished.
- std::unique_lock<std::mutex> lk(full_gap_buffers->m_mutex);
- full_gap_buffers->increment_finished_workers();
- lk.unlock();
-
- // Notify waiting update threads in case no more buffers
- // are going to be produced by streaming threads.
- full_gap_buffers->m_cv.notify_one();
-
- delete[] block_count;
- delete[] block_id_to_sblock_id;
- delete[] ptr;
- delete[] bucket_lbound;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_STREAM_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_update.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_update.h
deleted file mode 100644
index a4677d47..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/inmem_update.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/inmem_update.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_UPDATE_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_UPDATE_H_INCLUDED
-
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-#include <algorithm>
-
-#include "../gap_buffer.h"
-#include "../utils.h"
-#include "inmem_gap_array.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-//==============================================================================
-// This object creates a given number of threads that will perform gap array
-// updates. Most of the time all threads are sleeping on a conditional variable.
-// Once the gap buffer is available for processing, they are all woken up and
-// perform the update in parallel. The caller then waits until all threads are
-// finished and then puts the gap buffer in the poll of empty buffers.
-//
-// Only one object of this class should exist.
-//==============================================================================
-template<typename block_offset_type>
-struct gap_parallel_updater {
-
- template<typename T>
- static void parallel_update(gap_parallel_updater<T> *updater, int id) {
- while (true) {
- // Wait until there is a gap buffer available or the
- // message 'no more buffers' arrives.
- std::unique_lock<std::mutex> lk(updater->m_avail_mutex);
- while (!(updater->m_avail[id]) && !(updater->m_avail_no_more))
- updater->m_avail_cv.wait(lk);
-
- if (!(updater->m_avail[id]) && updater->m_avail_no_more) {
- // No more buffers -- exit.
- lk.unlock();
- return;
- }
-
- updater->m_avail[id] = false;
- lk.unlock();
-
- // Safely perform the update.
- const gap_buffer<T> *buf = updater->m_buffer;
- inmem_gap_array *gap = updater->m_gap_array;
- int beg = buf->sblock_beg[id];
- int end = beg + buf->sblock_size[id];
-
- for (int i = beg; i < end; ++i) {
- T x = buf->m_content[i];
- gap->m_count[x]++;
-
- // Check if values wrapped-around.
- if (gap->m_count[x] == 0) {
- gap->m_excess_mutex.lock();
- gap->m_excess.push_back(x);
- gap->m_excess_mutex.unlock();
- }
- }
-
- // Update the number of finished threads.
- bool finished_last = false;
- std::unique_lock<std::mutex> lk2(updater->m_finished_mutex);
- updater->m_finished++;
- if (updater->m_finished == updater->m_threads_cnt)
- finished_last = true;
- lk2.unlock();
-
- // If this was the last thread finishing, let the caller know.
- if (finished_last)
- updater->m_finished_cv.notify_one();
- }
- }
-
- gap_parallel_updater(inmem_gap_array *gap_array, int threads_cnt)
- : m_gap_array(gap_array),
- m_threads_cnt(threads_cnt),
- m_avail_no_more(false) {
- m_avail = new bool[m_threads_cnt];
- std::fill(m_avail, m_avail + m_threads_cnt, false);
- m_threads = new std::thread*[m_threads_cnt];
-
- // After this, threads immediately hang up on m_avail_cv.
- for (int i = 0; i < m_threads_cnt; ++i)
- m_threads[i] = new std::thread(parallel_update<block_offset_type>, this, i);
- }
-
- ~gap_parallel_updater() {
- // Signal all threads to finish.
- std::unique_lock<std::mutex> lk(m_avail_mutex);
- m_avail_no_more = true;
- lk.unlock();
- m_avail_cv.notify_all();
-
- // Wait until all threads finish and release memory.
- for (int i = 0; i < m_threads_cnt; ++i) {
- m_threads[i]->join();
- delete m_threads[i];
- }
- delete[] m_threads;
- delete[] m_avail;
- }
-
- void update(const gap_buffer<block_offset_type> *buffer) {
- // Prepare a message for each thread that new buffer is available.
- std::unique_lock<std::mutex> lk(m_avail_mutex);
- m_finished = 0;
- m_buffer = buffer;
- for (int i = 0; i < m_threads_cnt; ++i)
- m_avail[i] = true;
- lk.unlock();
-
- // Wake up all threads to perform the update.
- m_avail_cv.notify_all();
-
- // Wait until all threads report that they are done.
- std::unique_lock<std::mutex> lk2(m_finished_mutex);
- while (m_finished != m_threads_cnt)
- m_finished_cv.wait(lk2);
- lk2.unlock();
-
- // We are done processing the buffer. The caller of this method
- // can now place the buffer into the poll of empty buffers.
- }
-
-private:
- inmem_gap_array *m_gap_array;
-
- std::thread **m_threads;
- int m_threads_cnt;
-
- const gap_buffer<block_offset_type> *m_buffer;
-
- // For notifying threads about available buffer.
- std::mutex m_avail_mutex;
- std::condition_variable m_avail_cv;
- bool *m_avail;
- bool m_avail_no_more;
-
- // The mutex below is to protect m_finished. The condition
- // variable allows the caller to wait (and to be notified when done)
- // until threads complete processing their section of the buffer.
- int m_finished;
- std::mutex m_finished_mutex;
- std::condition_variable m_finished_cv;
-};
-
-template<typename block_offset_type>
-void inmem_gap_updater(gap_buffer_poll<block_offset_type> *full_gap_buffers,
- gap_buffer_poll<block_offset_type> *empty_gap_buffers,
- inmem_gap_array *gap, long n_increasers) {
-
- gap_parallel_updater<block_offset_type> *updater =
- new gap_parallel_updater<block_offset_type>(gap, n_increasers);
-
- while (true) {
- // Get a buffer from the poll of full buffers.
- std::unique_lock<std::mutex> lk(full_gap_buffers->m_mutex);
- while (!full_gap_buffers->available() && !full_gap_buffers->finished())
- full_gap_buffers->m_cv.wait(lk);
-
- if (!full_gap_buffers->available() && full_gap_buffers->finished()) {
- // There will be no more full buffers -- exit.
- lk.unlock();
- break;
- }
-
- gap_buffer<block_offset_type> *b = full_gap_buffers->get();
- lk.unlock();
-
- // Process buffer.
- updater->update(b);
-
- // Add the buffer to the poll of empty buffers and notify
- // the waiting thread.
- std::unique_lock<std::mutex> lk2(empty_gap_buffers->m_mutex);
- empty_gap_buffers->add(b);
- lk2.unlock();
- empty_gap_buffers->m_cv.notify_one();
- }
-
- delete updater;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_INMEM_UPDATE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/merge_schedule.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/merge_schedule.h
deleted file mode 100644
index 868163eb..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/merge_schedule.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/merge_schedule.h
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_MERGE_SCHEDULE_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_MERGE_SCHEDULE_H_INCLUDED
-
-#include <iostream>
-#include <vector>
-#include <cassert>
-#include <cstdlib>
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-class MergeSchedule {
-private:
- float rl_ratio;
- std::vector<int> split;
- std::vector<int> left_cost;
- std::vector<int> right_cost;
-
-public:
- MergeSchedule(int no_of_blocks, float right_left_ratio,
- int max_left_size = 0)
- { reset(no_of_blocks, right_left_ratio, max_left_size); }
-
- int left_size(int n) const {
- assert(n < (long)split.size());
- return split[n];
- }
- int right_size(int n) const {
- assert(n < (long)split.size());
- return n - split[n];
- }
- float cost(int n) const {
- assert(n < (long)split.size());
- return (left_cost[n] + rl_ratio * right_cost[n]) / n;
- }
- float n_left_merges(int n) const {
- assert(n < (long)split.size());
- return left_cost[n] / (1.0*n);
- }
- float n_right_merges(int n) const {
- assert(n < (long)split.size());
- return right_cost[n] / (1.0*n);
- }
-
- void reset(int no_of_blocks, float right_left_ratio,
- int max_left_size = 0)
- {
- int n = no_of_blocks;
- rl_ratio = right_left_ratio;
- if (max_left_size == 0) {
- max_left_size = n-1;
- }
-
- split.resize(n+1);
- left_cost.resize(n+1);
- right_cost.resize(n+1);
-
- split[1] = 0;
- left_cost[1] = 0;
- right_cost[1] = 0;
-
- for (int i=2; i<=n; ++i) {
- //int min_l = std::min((i+1)/2, max_left_size);
- int max_l = std::min(i-1, max_left_size);
- float min_cost = 1E40;
- for (int l=1; l<=max_l; ++l) {
- int r = i-l;
- int l_cost = l + left_cost[l] + left_cost[r];
- int r_cost = r + right_cost[l] + right_cost[r];
- float total_cost = l_cost + rl_ratio * r_cost;
- if (total_cost < min_cost) {
- min_cost = total_cost;
- split[i] = l;
- left_cost[i] = l_cost;
- right_cost[i] = r_cost;
- }
- }
- }
- }
-};
-
-void print_schedule(const MergeSchedule & sched, int n, std::string indent) {
- if (n == 1) {
- std::cerr << "1\n";
- return;
- }
- std::cerr << n << "\t";
- int l = sched.left_size(n);
- print_schedule(sched, l, indent + ":\t");
- std::cerr << indent;
- print_schedule(sched, n-l, indent + "\t");
-}
-
-void print_schedule(const MergeSchedule & sched, int n) {
- std::string intend = "\t";
- print_schedule(sched, n, intend);
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_MERGE_SCHEDULE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/pagearray.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/pagearray.h
deleted file mode 100644
index 3c8fa829..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/pagearray.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/pagearray.h
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section DESCRIPTION
- *
- * The paged array representation, as described in Appending B of
- *
- * Juha Karkkainen, Peter Sanders, Stefan Burkhardt:
- * Linear work suffix array construction.
- * J. ACM 53(6), p. 918-936 (2006).
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PAGEARRAY_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PAGEARRAY_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <vector>
-#include <stack>
-#include <algorithm>
-#include <thread>
-#include <mutex>
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename T, unsigned k_pagesize_log = 12U>
-struct pagearray {
- static const unsigned pagesize_log = k_pagesize_log;
- static const unsigned pagesize = (1U << k_pagesize_log);
- static const unsigned pagesize_mask = (1U << k_pagesize_log) - 1;
-
- typedef T value_type;
- typedef pagearray<value_type, k_pagesize_log> pagearray_type;
-
- long m_length;
- long m_shift;
-
- value_type *m_origin;
- value_type **m_pageindex;
-
- // Initialize empty page array, possible it will be
- // a result of merging two page arrays.
- pagearray(value_type *origin, long length) {
- m_length = length;
- m_origin = origin;
- m_shift = (pagesize - m_length % pagesize) % pagesize;
-
- long n_pages = (m_length + pagesize - 1) / pagesize;
- m_pageindex = new value_type*[n_pages + 1];
- }
-
- // Build page array from plain array.
- pagearray(value_type *begin, value_type *end) {
- m_length = end - begin;
- m_origin = begin;
- m_shift = (pagesize - m_length % pagesize) % pagesize;
-
- long n_pages = (m_length + pagesize - 1) / pagesize;
- m_pageindex = new value_type*[n_pages + 1];
- for (long i = 0; i < n_pages; ++i)
- m_pageindex[i] = begin + i * pagesize - m_shift;
- }
-
- inline value_type &operator[] (long i) const {
- i += m_shift;
- return m_pageindex[i >> pagesize_log][i & pagesize_mask];
- }
-
- inline long get_page_offset(long i) const {
- i += m_shift;
- return (i & pagesize_mask);
- }
-
- inline long get_page_id(long i) const {
- i += m_shift;
- return (i >> pagesize_log);
- }
-
- inline long get_page_id(value_type *p) const {
- p += m_shift;
- return ((p - m_origin) >> pagesize_log);
- }
-
- inline bool owns_page(value_type *p) const {
- p += m_shift;
- return m_origin <= p && p < m_origin + m_length;
- }
-
- inline value_type *get_page_addr(long id) const {
- return m_origin + (id << pagesize_log) - m_shift;
- }
-
- inline bool fully_contained_page(value_type *p) const {
- p += m_shift;
- return (m_origin <= p && p + pagesize <= m_origin + m_length);
- }
-
- // Used only for testing.
- void random_shuffle() {
- long trimmed_length = m_length - m_length % pagesize;
- long n_full_pages = (trimmed_length / pagesize);
- for (long t = 0; t < 2 * n_full_pages; ++t) {
- long i = rand() % n_full_pages;
- long j = rand() % n_full_pages;
-
- // Swap the page content.
- for (long tt = 0; tt < pagesize; ++tt)
- std::swap(m_pageindex[i][tt], m_pageindex[j][tt]);
-
- // Update page index.
- std::swap(m_pageindex[i], m_pageindex[j]);
- }
- }
-
- ~pagearray() {
- if (m_pageindex)
- delete[] m_pageindex;
- }
-
- static void permute_to_plain_array_aux(pagearray_type &a,
- std::mutex *mutexes, long &selector, std::mutex &selector_mutex) {
- long n_pages = (a.m_length + pagesize - 1) / pagesize;
-
- // Invariant: at all times, index[i] for any i points
- // to content that should be placed at i-th page of tab.
- while (true) {
- // Find starting point on some cycle.
- long start;
- while (true) {
- // Get the candidate using selector.
- std::unique_lock<std::mutex> lk(selector_mutex);
- while (selector < n_pages && a.m_pageindex[selector] == a.get_page_addr(selector))
- ++selector;
-
- // Exit, if the selector does not give any candidate.
- if (selector == n_pages) {
- lk.unlock();
- return;
- }
-
- // Unlock selector lock, allow other threads
- // to look for candidates in the meantime.
- start = selector++;
- lk.unlock();
-
- // Lock a candidate page and check if it's still good.
- // If yes, keep lock and proceed to process it.
- if (mutexes[start].try_lock() && a.m_pageindex[start] != a.get_page_addr(start)) break;
- }
-
- // Invariant: we have found a good candidate
- // page and have lock on mutexes[start].
-
- // First, we create temporary space for the
- // content of page at index[start] and move
- // the content at index[start] to that temp space.
- value_type *temp = new value_type[pagesize];
- std::copy(a.m_pageindex[start], a.m_pageindex[start] + pagesize, temp);
- std::swap(a.m_pageindex[start], temp);
- mutexes[start].unlock();
-
- // We now have free space at temp. Keep placing there
- // elements from the cycle and moving temp pointer.
- do {
- // Invariant: temp points to a page inside tab.
- long next = a.get_page_id(temp);
- std::unique_lock<std::mutex> lk(mutexes[next]);
- std::copy(a.m_pageindex[next], a.m_pageindex[next] + pagesize, temp);
- std::swap(a.m_pageindex[next], temp);
- lk.unlock();
- } while (a.owns_page(temp));
- delete[] temp;
- }
- }
-
- void permute_to_plain_array(long max_threads) {
- long n_pages = (m_length + pagesize - 1) / pagesize;
- long selector = 0;
-
- std::mutex selector_mutex;
- std::mutex *mutexes = new std::mutex[n_pages];
- std::thread **threads = new std::thread*[max_threads];
-
- for (long i = 0; i < max_threads; ++i)
- threads[i] = new std::thread(permute_to_plain_array_aux,
- std::ref(*this), mutexes, std::ref(selector), std::ref(selector_mutex));
-
- for (long i = 0; i < max_threads; ++i) threads[i]->join();
- for (long i = 0; i < max_threads; ++i) delete threads[i];
- delete[] threads;
- delete[] mutexes;
- delete[] m_pageindex;
- m_pageindex = NULL;
- }
-};
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PAGEARRAY_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_copy.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_copy.h
deleted file mode 100644
index f6060750..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_copy.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/parallel_copy.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_COPY_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_COPY_H_INCLUDED
-
-#include <algorithm>
-#include <thread>
-
-#include "../uint40.h"
-#include "bwtsa.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename T, typename S>
-void parallel_copy_aux(const T *src, S *dest, long length) {
- for (long i = 0; i < length; ++i)
- dest[i] = (S)src[i];
-}
-
-// Specilization
-template<>
-void parallel_copy_aux(const bwtsa_t<uint40> *src, unsigned char *dest, long length) {
- for (long i = 0; i < length; ++i)
- dest[i] = src[i].bwt;
-}
-
-// Specilization
-template<>
-void parallel_copy_aux(const bwtsa_t<int> *src, unsigned char *dest, long length) {
- for (long i = 0; i < length; ++i)
- dest[i] = src[i].bwt;
-}
-
-
-// Conversion from T to S has to make sense.
-template<typename T, typename S>
-void parallel_copy(const T *src, S *dest, long length, long max_threads) {
- long max_block_size = (length + max_threads - 1) / max_threads;
- long n_blocks = (length + max_block_size - 1) / max_block_size;
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_beg = i * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(parallel_copy_aux<T, S>,
- src + block_beg, dest + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-}
-
-// Specialization
-template<>
-void parallel_copy(const bwtsa_t<uint40> *src, unsigned char *dest, long length, long max_threads) {
- long max_block_size = (length + max_threads - 1) / max_threads;
- long n_blocks = (length + max_block_size - 1) / max_block_size;
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_beg = i * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(parallel_copy_aux<bwtsa_t<uint40>, unsigned char>,
- src + block_beg, dest + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-}
-
-// Specialization
-template<>
-void parallel_copy(const bwtsa_t<int> *src, unsigned char *dest, long length, long max_threads) {
- long max_block_size = (length + max_threads - 1) / max_threads;
- long n_blocks = (length + max_block_size - 1) / max_block_size;
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_beg = i * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(parallel_copy_aux<bwtsa_t<int>, unsigned char>,
- src + block_beg, dest + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_expand.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_expand.h
deleted file mode 100644
index 6850eb4d..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_expand.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/parallel_expand.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_EXPAND_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_EXPAND_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <algorithm>
-#include <thread>
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename T, typename S>
-void parallel_expand_aux(const T *src, S *dest, long length) {
- for (long i = 0; i < length; ++i)
- dest[i] = (S)src[i];
-}
-
-// Requires sizeof(T) < sizeof(S).
-template<typename T, typename S>
-S *parallel_expand(T *tab, long length, long max_threads) {
- S *result = (S *)tab;
-
- long diff = (long)sizeof(S) - (long)sizeof(T);
- if (!diff) {
- fprintf(stderr, "Error: expanding requires sizeof(T) < sizeof(S)\n");
- std::exit(EXIT_FAILURE);
- }
-
- if (length < (1L << 20)) {
- // Move the elelements sequentially.
- for (long i = length - 1; i >= 0; --i)
- result[i] = (S)tab[i];
-
- return result;
- }
-
- // Compute the index of the smallest element (of type T)
- // that lies past the end of the last element of tab
- // (after converting all elements to type S).
- long bytes_before_expanding = length * sizeof(T);
- long split = (bytes_before_expanding + sizeof(S) - 1) / sizeof(S);
-
- // Move the elements in the range [split, length) in parallel.
- // This is safe (no element overwriting) because of how we
- // computed the split.
- long elems = length - split;
- long max_block_size = (elems + max_threads - 1) / max_threads;
- long n_blocks = (elems + max_block_size - 1) / max_block_size;
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_beg = split + i * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(parallel_expand_aux<T, S>,
- tab + block_beg, result + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-
- // Recursively expand the first split elements.
- parallel_expand<T, S>(tab, split, max_threads);
-
- return result;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_merge.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_merge.h
deleted file mode 100644
index c1c4a666..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_merge.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/parallel_merge.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section DESCRIPTION
- *
- * Parallel version of almost in-place stable merging described in
- * the Appending B of
- *
- * Juha Karkkainen, Peter Sanders, Stefan Burkhardt:
- * Linear work suffix array construction.
- * J. ACM 53(6), p. 918-936 (2006).
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_MERGE_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_MERGE_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <vector>
-#include <stack>
-#include <algorithm>
-#include <thread>
-#include <mutex>
-
-#include "../utils.h"
-#include "pagearray.h"
-#include "inmem_gap_array.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-//==============================================================================
-// Compute the range [res_beg..res_beg+res_size) of the output (i.e., the
-// sequence after merging). The range is guaranteed to be aligned with page
-// boundaries.
-//==============================================================================
-template<typename pagearray_type>
-void parallel_merge_aux(
- const pagearray_type *l_pagearray,
- const pagearray_type *r_pagearray,
- pagearray_type *output,
- const inmem_gap_array *gap,
- long left_idx, long right_idx,
- long remaining_gap,
- long page_range_beg,
- long page_range_end,
- long what_to_add) {
-
- typedef typename pagearray_type::value_type value_type;
- static const unsigned pagesize = pagearray_type::pagesize;
-
- long res_beg = std::max(0L, output->get_page_addr(page_range_beg) - output->m_origin);
- long res_end = std::min(output->m_length, output->get_page_addr(page_range_end) - output->m_origin);
- long res_size = res_end - res_beg;
-
- long lpage_read = 0L;
- long lpage_id = l_pagearray->get_page_id(left_idx);
- long lpage_offset = l_pagearray->get_page_offset(left_idx);
- value_type *lpage = l_pagearray->m_pageindex[lpage_id++];
-
- long rpage_read = 0L;
- long rpage_id = r_pagearray->get_page_id(right_idx);
- long rpage_offset = r_pagearray->get_page_offset(right_idx);
- value_type *rpage = r_pagearray->m_pageindex[rpage_id++];
-
- long pageid = output->get_page_id(res_beg);
- long filled = output->get_page_offset(res_beg);
- value_type *dest = new value_type[pagesize];
- output->m_pageindex[pageid++] = dest;
-
- std::stack<value_type*> freepages;
- size_t excess_ptr = std::lower_bound(gap->m_excess.begin(),
- gap->m_excess.end(), left_idx + 1) - gap->m_excess.begin();
-
- for (long i = 0; i < res_size; ++i) {
- if (filled == pagesize) {
- if (freepages.empty()) dest = new value_type[pagesize];
- else { dest = freepages.top(); freepages.pop(); }
- output->m_pageindex[pageid++] = dest;
- filled = 0L;
- }
- if (remaining_gap > 0) {
- --remaining_gap;
- // The next element comes from the right subarray.
- dest[filled] = rpage[rpage_offset++];
- dest[filled++].sa += what_to_add;
- rpage_read++;
- if (rpage_offset == pagesize) {
- // We reached the end of page in the right subarray.
- // We put it into free pages if we read exactly
- // pagesize elements from it. This means the no other
- // thread will attemp to read from it in the future.
- if (rpage_read == pagesize) freepages.push(r_pagearray->m_pageindex[rpage_id - 1]);
-
- // Note: we don't have to check, if the page below exists, because we have
- // a sentinel page in the page index of every pagearray.
- rpage = r_pagearray->m_pageindex[rpage_id++];
- rpage_offset = 0L;
- rpage_read = 0L;
- }
- } else {
- // Next elem comes from the left subarray.
- dest[filled++] = lpage[lpage_offset++];
- left_idx++;
- lpage_read++;
-
- // Compute gap[left_idx].
- long gap_left_idx = gap->m_count[left_idx];
- while (excess_ptr < gap->m_excess.size() &&
- gap->m_excess[excess_ptr] == left_idx) {
- gap_left_idx += 256L;
- ++excess_ptr;
- }
-
- remaining_gap = gap_left_idx;
- if (lpage_offset == pagesize) {
- // We reached the end of page in the left
- // subarray, proceed analogously.
- if (lpage_read == pagesize) freepages.push(l_pagearray->m_pageindex[lpage_id - 1]);
-
- // Note: we don't have to check, if the page below exists, because we have
- // a sentinel page in the page index of every pagearray.
- lpage = l_pagearray->m_pageindex[lpage_id++];
- lpage_offset = 0L;
- lpage_read = 0L;
- }
- }
- }
-
- // Release the unused auxiliary pages.
- while (!freepages.empty()) {
- value_type* p = freepages.top();
- freepages.pop();
- if (!output->owns_page(p))
- delete[] p;
- }
-}
-
-template<typename pagearray_type>
-pagearray_type *parallel_merge(pagearray_type *l_pagearray,
- pagearray_type *r_pagearray, const inmem_gap_array *gap, long max_threads,
- long i0, long &aux_result, long what_to_add) {
- static const unsigned pagesize_log = pagearray_type::pagesize_log;
- static const unsigned pagesize = pagearray_type::pagesize;
- typedef typename pagearray_type::value_type value_type;
- typedef pagearray<value_type, pagesize_log> output_type;
-
- //----------------------------------------------------------------------------
- // STEP 1: compute the initial parameters for each thread.
- //----------------------------------------------------------------------------
- fprintf(stderr, "queries: ");
- long double start = utils::wclock();
- long length = l_pagearray->m_length + r_pagearray->m_length;
- long n_pages = (length + pagesize - 1) / pagesize;
- long pages_per_thread = (n_pages + max_threads - 1) / max_threads;
- long n_threads = (n_pages + pages_per_thread - 1) / pages_per_thread;
- output_type *result = new output_type(l_pagearray->m_origin, length);
-
- long *left_idx = new long[n_threads];
- long *right_idx = new long[n_threads];
- long *remaining_gap = new long[n_threads];
-
- // Prepare gap queries.
- long *gap_query = new long[n_threads];
- long *gap_answer_a = new long[n_threads];
- long *gap_answer_b = new long[n_threads];
- for (long i = 0; i < n_threads; ++i) {
- long page_range_beg = i * pages_per_thread;
- long res_beg = std::max(0L, result->get_page_addr(page_range_beg) - result->m_origin);
- gap_query[i] = res_beg;
- }
-
- // Answer these queries in parallel and convert the answers
- // to left_idx, right_idx and remaining_gap values.
- aux_result = gap->answer_queries(n_threads, gap_query, gap_answer_a, gap_answer_b, max_threads, i0);
- for (long i = 0; i < n_threads; ++i) {
- long page_range_beg = i * pages_per_thread;
- long res_beg = std::max(0L, result->get_page_addr(page_range_beg) - result->m_origin);
- long j = gap_answer_a[i], s = gap_answer_b[i];
- left_idx[i] = j;
- right_idx[i] = res_beg - j;
- remaining_gap[i] = j + s - res_beg;
- }
- delete[] gap_query;
- delete[] gap_answer_a;
- delete[] gap_answer_b;
- fprintf(stderr, "%.2Lf ", utils::wclock() - start);
-
- //----------------------------------------------------------------------------
- // STEP 2: merge the arrays.
- //----------------------------------------------------------------------------
- fprintf(stderr, "merge: ");
- start = utils::wclock();
-
- std::thread **threads = new std::thread*[n_threads];
- for (long t = 0; t < n_threads; ++t) {
- long page_range_beg = t * pages_per_thread;
- long page_range_end = std::min(page_range_beg + pages_per_thread, n_pages);
-
- threads[t] = new std::thread(parallel_merge_aux<pagearray_type>,
- l_pagearray, r_pagearray, result, gap, left_idx[t], right_idx[t],
- remaining_gap[t], page_range_beg, page_range_end, what_to_add);
- }
- for (long t = 0; t < n_threads; ++t) threads[t]->join();
- for (long t = 0; t < n_threads; ++t) delete threads[t];
- delete[] threads;
- delete[] left_idx;
- delete[] right_idx;
- delete[] remaining_gap;
-
- bool *usedpage = new bool[n_pages];
- std::fill(usedpage, usedpage + n_pages, false);
-
- // Handle the page that was not full
- // manually (if there was one).
- if (length % pagesize) {
- long size = length % pagesize;
- value_type *src = result->m_pageindex[0];
- value_type *dest = result->get_page_addr(0);
- std::copy(src + pagesize - size, src + pagesize, dest + pagesize - size);
- result->m_pageindex[0] = dest;
- usedpage[0] = true;
-
- // Release the lastpage if it was temporary.
- if (!result->owns_page(src))
- delete[] src;
- }
-
- // Find unused input pages.
- std::vector<std::pair<long, value_type*> > auxpages;
- for (long i = 0; i < n_pages; ++i) {
- value_type *p = result->m_pageindex[i];
- if (result->owns_page(p)) usedpage[result->get_page_id(p)] = true;
- else auxpages.push_back(std::make_pair(i, p));
- }
-
- // Assign aux pages to unused pages in any
- // order and release them (aux pages).
- for (long i = 0, ptr = 0; i < n_pages; ++i) {
- if (!usedpage[i]) {
- long id = auxpages[ptr].first;
- value_type *src = auxpages[ptr++].second;
- value_type *dest = result->get_page_addr(i);
- std::copy(src, src + pagesize, dest);
- result->m_pageindex[id] = dest;
- delete[] src;
- }
- }
- delete[] usedpage;
- fprintf(stderr, "%.2Lf ", utils::wclock() - start);
-
- return result;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_MERGE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_shrink.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_shrink.h
deleted file mode 100644
index 0283c600..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/parallel_shrink.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/parallel_shrink.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED
-
-#include <algorithm>
-#include <thread>
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename T, typename S>
-void parallel_shrink_aux(T *src, S *dest, long length) {
- for (long i = 0; i < length; ++i)
- dest[i] = (S)src[i];
-}
-
-
-// Requires sizeof(T) > sizeof(S).
-template<typename T, typename S>
-S *parallel_shrink(T *tab, long length, long max_threads) {
- S *result = (S *)tab;
-
- long diff = (long)sizeof(T) - (long)sizeof(S);
- if (!diff) {
- fprintf(stderr, "Error: shrinking requires sizeof(T) > sizeof(S)\n");
- std::exit(EXIT_FAILURE);
- }
-
- // long threshold = (sizeof(T) + diff - 1) / diff;
- if (length < (1L << 20)/*threshold*/) {
- // Move the elelements sequentially.
- for (long i = 0; i < length; ++i)
- result[i] = (S)tab[i];
-
- return result;
- }
-
- // Compute the index of the smallest element (of type T)
- // that lies past the end of the last element of tab
- // (after converting all elemeents to type S).
- long bytes_after_shrinking = length * sizeof(S);
- long split = (bytes_after_shrinking + sizeof(T) - 1) / sizeof(T);
-
- // Recursively shrink the part up to (but excluding) split.
- parallel_shrink<T, S>(tab, split, max_threads);
-
- // Move the elements in the range [split, length) in parallel.
- // This is safe (no element overwriting) because of how we
- // computed the split.
- long elems = length - split;
- long max_block_size = (elems + max_threads - 1) / max_threads;
- long n_blocks = (elems + max_block_size - 1) / max_block_size;
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_beg = split + i * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(parallel_shrink_aux<T, S>,
- tab + block_beg, result + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-
- return result;
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_PARALLEL_SHRINK_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/rank.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/rank.h
deleted file mode 100644
index 486eb8e5..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/rank.h
+++ /dev/null
@@ -1,798 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/rank.h
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section DESCRIPTION
- *
- * A general rank data structure. Basic idea of the encoding is from
- * the rank data structure used in the external-memory algorithm for
- * constructing the Burrows-Wheeler transform called bwtdisk (available
- * at: http://people.unipmn.it/manzini/bwtdisk/) described in [1]. We
- * extended the data structure by applying the fixed block boosting [2]
- * and alphabet partitioning [3] techniques. The resulting data structure
- * was described in [4]. This file extends the implementation used in [4]
- * by parallelizing the construction and introducting an alternative
- * encoding (called type-I in the code). Type-I encoding is a novel
- * encoding due to present authors.
- *
- * References:
- * [1] Paolo Ferragina, Travis Gagie, Giovanni Manzini:
- * Lightweight Data Indexing and Compression in External Memory.
- * Algorithmica 63(3), p. 707-730 (2012).
- * [2] Juha Karkkainen, Simon J. Puglisi:
- * Fixed Block Compression Boosting in FM-Indexes.
- * In Proc. SPIRE 2011, p. 174-184.
- * [3] Jeremy Barbay, Travis Gagie, Gonzalo Navarro, Yakov Nekrich:
- * Alphabet Partitioning for Compressed Rank/Select and Applications.
- * In Proc. ISAAC 2010, p. 315-326.
- * [4] Juha Karkkainen, Dominik Kempa:
- * Engineering a Lightweight External Memory Suffix Array Construction
- * Algorithm.
- * In Proc. ICABD 2014, p. 53-60.
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_RANK_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_RANK_H_INCLUDED
-
-#include <cstdio>
-#include <algorithm>
-#include <vector>
-#include <thread>
-
-#include "../utils.h"
-#include "bwtsa.h"
-#include "pagearray.h"
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<
- typename saidx_t,
- unsigned pagesize_log,
- unsigned k_sblock_size_log = 24,
- unsigned k_cblock_size_log = 20,
- unsigned k_sigma_log = 8>
-class rank4n {
- private:
- typedef pagearray<bwtsa_t<saidx_t>, pagesize_log> pagearray_type;
-
- static const unsigned long k_cblock_size;
- static const unsigned long k_cblock_size_mask;
- static const unsigned long k_cblock_size_mask_neg;
- static const unsigned k_cblocks_in_sblock_log;
- static const unsigned k_cblocks_in_sblock;
- static const unsigned k_cblocks_in_sblock_mask;
- static const unsigned k_2cblock_size;
- static const unsigned k_2cblock_size_mask;
- static const unsigned k_sblock_size;
- static const unsigned k_sblock_size_mask;
- static const unsigned k_sigma;
- static const unsigned k_sigma_mask;
-
- static const unsigned pagesize = (1U << pagesize_log);
- static const unsigned pagesize_mask = (1U << pagesize_log) - 1;
-
- static const unsigned k_char_type_freq = 0x01;
- static const unsigned k_char_type_rare = 0x02;
- static const unsigned k_char_type_missing = 0x03;
-
- unsigned long m_length; // length of original sequence
- unsigned long n_cblocks; // number of context blocks
- unsigned long n_sblocks; // number of super blocks
-
- unsigned long *m_sblock_header;
- unsigned long *m_cblock_header;
- unsigned long *m_cblock_header2;
-
- unsigned char *m_cblock_type;
- unsigned char *m_cblock_mapping;
-
- unsigned *m_freq_trunk;
- unsigned *m_rare_trunk;
-
- public:
- unsigned long *m_count; // symbol counts
-
- public:
- rank4n(const pagearray_type *ptext, unsigned long length, unsigned max_threads) {
- m_length = length;
- n_cblocks = (m_length + k_cblock_size - 1) / k_cblock_size;
- n_sblocks = (n_cblocks + k_cblocks_in_sblock - 1) / k_cblocks_in_sblock;
-
- m_count = (unsigned long *)malloc(256L * sizeof(unsigned long));
- std::fill(m_count, m_count + 256, 0UL);
- if (!m_length) return;
-
- long double start = utils::wclock();
- m_sblock_header = (unsigned long *)malloc(n_sblocks * sizeof(unsigned long) * k_sigma);
- m_cblock_header = (unsigned long *)malloc(n_cblocks * sizeof(unsigned long));
- m_cblock_header2 = (unsigned long *)malloc(n_cblocks * k_sigma * sizeof(unsigned long));
- m_cblock_mapping = (unsigned char *)malloc(n_cblocks * k_sigma * 2);
- m_cblock_type = (unsigned char *)malloc((n_cblocks + 7) / 8);
- m_freq_trunk = (unsigned *)calloc(n_cblocks * k_cblock_size, sizeof(unsigned));
- std::fill(m_cblock_type, m_cblock_type + (n_cblocks + 7) / 8, 0);
- unsigned char *bwt = (unsigned char *)malloc(length + k_cblock_size);
- long double alloc_time = utils::wclock() - start;
- if (alloc_time > 0.05L)
- fprintf(stderr, "alloc: %.2Lf ", alloc_time);
-
- encode_type_I(ptext, bwt, max_threads);
- encode_type_II(bwt, max_threads);
-
- m_count[0] -= n_cblocks * k_cblock_size - m_length; // remove extra zeros
- free(bwt);
- }
-
- void encode_type_I(const pagearray_type *ptext, unsigned char *bwt,
- long max_threads) {
- //------------------------------------------------------------------------
- // STEP 1: split all cblocks into equal size ranges (except possible the
- // last one). Each range is processed by one thread. During this
- // step we compute: (i) type of each cblock, (ii) encode all
- // type-I cblocks and for all type-II cblocks, we compute and
- // store: symbol mapping, symbol type (freq / rare / non-occurring)
- // and values of freq_cnt_log and rare_cnt_log.
- //------------------------------------------------------------------------
- unsigned long range_size = (n_cblocks + max_threads - 1) / max_threads;
- unsigned long n_ranges = (n_cblocks + range_size - 1) / range_size;
-
- unsigned long *rare_trunk_size = new unsigned long[n_cblocks];
- std::fill(rare_trunk_size, rare_trunk_size + n_cblocks, 0);
-
- bool *cblock_type = new bool[n_cblocks];
- std::fill(cblock_type, cblock_type + n_cblocks, 0);
-
- unsigned **occ = (unsigned **)malloc(n_ranges * sizeof(unsigned *));
- for (unsigned long i = 0; i < n_ranges; ++i)
- occ[i] = (unsigned *)malloc((k_cblock_size + 1) * sizeof(unsigned));
-
- fprintf(stderr, "s1: ");
- long double start = utils::wclock();
- std::thread **threads = new std::thread*[n_ranges];
- for (unsigned long i = 0; i < n_ranges; ++i) {
- unsigned long range_beg = i * range_size;
- unsigned long range_end = std::min(range_beg + range_size, n_cblocks);
-
- threads[i] = new std::thread(encode_type_I_aux, std::ref(*this),
- ptext, range_beg, range_end, rare_trunk_size, cblock_type, occ[i], bwt);
- }
-
- for (unsigned long i = 0; i < n_ranges; ++i) threads[i]->join();
- for (unsigned long i = 0; i < n_ranges; ++i) delete threads[i];
- delete[] threads;
-
- for (unsigned long i = 0; i < n_ranges; ++i)
- free(occ[i]);
- free(occ);
-
- fprintf(stderr, "%.2Lf ", utils::wclock() - start);
-
-
- //------------------------------------------------------------------------
- // STEP 2: compute global information based on local cblock computation:
- // * store cblock types,
- // * total size of rare trunk,
- // * pointers to the beginning of each rare trunk,
- // * cumulative counts of all symbols,
- // * non-inclusive partial sum over cblock range counts.
- //------------------------------------------------------------------------
- fprintf(stderr, "s2: ");
- start = utils::wclock();
- unsigned long rare_trunk_total_size = 0;
- for (unsigned long cblock_id = 0; cblock_id < n_cblocks; ++cblock_id) {
- unsigned long cblock_beg = (cblock_id << k_cblock_size_log);
-
- // 1
- // Store cblock type.
- if (cblock_type[cblock_id])
- m_cblock_type[cblock_id >> 3] |= (1 << (cblock_id & 7));
-
- // 2
- // Compute the pointer to rare trunk and update total rare trunk size.
- unsigned long this_cblock_rare_trunk_size = rare_trunk_size[cblock_id];
- m_cblock_header[cblock_id] |= (rare_trunk_total_size << 16);
- rare_trunk_total_size += this_cblock_rare_trunk_size;
-
- // 3
- // Update cblock header.
- unsigned long cblock_header_beg = (cblock_id << k_sigma_log);
- for (unsigned c = 0; c < k_sigma; ++c)
- m_cblock_header2[cblock_header_beg + c] |= (m_count[c] << (k_cblock_size_log + 6));
-
- // 4
- // Update sblock header,
- if (!(cblock_beg & k_sblock_size_mask)) {
- unsigned long sblock_id = (cblock_beg >> k_sblock_size_log);
- unsigned long sblock_header_beg = (sblock_id << k_sigma_log);
- for (unsigned c = 0; c < k_sigma; ++c)
- m_sblock_header[sblock_header_beg + c] = m_count[c];
- }
-
- // 5
- // Update m_count.
- unsigned long ptr = (cblock_id << k_sigma_log);
- for (unsigned c = 0; c + 1 < k_sigma; ++c)
- m_count[c] += ((m_cblock_header2[ptr + c + 1] >> 5) & k_2cblock_size_mask) -
- ((m_cblock_header2[ptr + c] >> 5) & k_2cblock_size_mask);
- m_count[k_sigma - 1] += k_cblock_size -
- ((m_cblock_header2[ptr + k_sigma - 1] >> 5) & k_2cblock_size_mask);
- }
- m_rare_trunk = (unsigned *)calloc(rare_trunk_total_size, sizeof(unsigned));
-
- delete[] cblock_type;
- delete[] rare_trunk_size;
-
- fprintf(stderr, "%.2Lf ", utils::wclock() - start);
- }
-
- static void encode_type_I_aux(rank4n &r, const pagearray_type *ptext,
- unsigned long cblock_range_beg, unsigned long cblock_range_end,
- unsigned long *rare_trunk_size, bool *cblock_type, unsigned *occ, unsigned char *bwt) {
- std::vector<std::pair<uint32_t, unsigned char> > sorted_chars;
- std::vector<unsigned char> freq_chars;
- std::vector<unsigned char> rare_chars;
-
- unsigned *refpoint_precomputed = (unsigned *)malloc(k_cblock_size * sizeof(unsigned));
- unsigned *cblock_count = new unsigned[k_sigma];
- unsigned *list_beg = new unsigned[k_sigma];
- unsigned *list_beg2 = new unsigned[k_sigma];
- bool *isfreq = new bool[k_sigma];
- unsigned *lookup_bits_precomputed = new unsigned[k_sigma];
- unsigned *min_block_size_precomputed = new unsigned[k_sigma];
- unsigned long *refpoint_mask_precomputed = new unsigned long[k_sigma];
-
- typedef typename pagearray_type::value_type value_type;
-
- // Process cblocks one by one.
- for (unsigned long cblock_id = cblock_range_beg; cblock_id < cblock_range_end; ++cblock_id) {
- unsigned long cblock_beg = cblock_id << k_cblock_size_log;
- unsigned long cblock_end = cblock_beg + k_cblock_size;
-
- // Compute symbol counts inside cblock and store bwt symbols.
- std::fill(cblock_count, cblock_count + k_sigma, 0);
- unsigned long maxj = std::min(cblock_end, r.m_length);
- unsigned long page_id = (cblock_beg >> pagesize_log);
- value_type *cur_page = ptext->m_pageindex[page_id++];
- unsigned long page_offset = ptext->get_page_offset(cblock_beg);
- for (unsigned long j = cblock_beg; j < maxj; ++j) {
- unsigned char c = cur_page[page_offset].bwt;
- bwt[j] = c;
- ++cblock_count[c];
- ++page_offset;
- if (page_offset == pagesize) {
- cur_page = ptext->m_pageindex[page_id];
- ++page_id;
- page_offset = 0;
- }
- }
- for (unsigned long j = maxj; j < cblock_end; ++j) {
- bwt[j] = 0;
- ++cblock_count[0];
- }
-
-
- // Compute starting positions of occurrences lists.
- for (unsigned j = 0, t, s = 0; j < k_sigma; ++j) {
- t = cblock_count[j];
- list_beg[j] = s;
- list_beg2[j] = s;
- s += t;
- }
-
- // Store pointers to beginnings of occurrence lists in the type-I
- // cblock header. Note: this implicitly encodes cblock counts.
- for (unsigned c = 0; c < k_sigma; ++c)
- r.m_cblock_header2[(cblock_id << k_sigma_log) + c] = (list_beg[c] << 5);
-
- // Sort symbol counts by frequencies.
- sorted_chars.clear();
- for (unsigned j = 0; j < k_sigma; ++j)
- if (cblock_count[j])
- sorted_chars.push_back(std::make_pair(cblock_count[j], j));
- std::sort(sorted_chars.begin(), sorted_chars.end());
-
- // Separate (at most, due to rounding of freq_cnt)
- // about 3% of rarest symbols.
- unsigned rare_cnt = 0L, rare_sum = 0L;
- while (rare_cnt < sorted_chars.size() &&
- 16L * (rare_sum + sorted_chars[rare_cnt].first) <= k_cblock_size)
- rare_sum += sorted_chars[rare_cnt++].first;
-
- // Compute freq_cnt. Then round up freq_cnt + 1 (+1 is
- // for rare char marker) to the smallest power of two.
- // Note: rare_cnt > 0, so after rounding freq_cnt <= 256.
- unsigned freq_cnt = sorted_chars.size() - rare_cnt;
- unsigned freq_cnt_log = utils::log2ceil(freq_cnt + 1);
- freq_cnt = (1 << freq_cnt_log);
-
- // Recompute rare_cnt (note the +1).
- rare_cnt = 0;
- if (sorted_chars.size() + 1 > freq_cnt)
- rare_cnt = sorted_chars.size() + 1 - freq_cnt;
-
- // Compute freq and rare chars.
- rare_chars.clear();
- freq_chars.clear();
- for (unsigned i = 0; i < rare_cnt; ++i)
- rare_chars.push_back(sorted_chars[i].second);
- for (unsigned i = rare_cnt; i < sorted_chars.size(); ++i)
- freq_chars.push_back(sorted_chars[i].second);
-
- // If there are rare symbols, round up
- // rare_cnt to the smallest power of two.
- unsigned rare_cnt_log = 0;
- if (rare_cnt) {
- rare_cnt_log = utils::log2ceil(rare_cnt);
- rare_cnt = (1 << rare_cnt_log);
- }
-
- // Update cblock type-I header.
- r.m_cblock_header[cblock_id] = freq_cnt_log;
- r.m_cblock_header[cblock_id] |= (rare_cnt_log << 8);
-
- // Compute and store symbols mapping.
- std::sort(freq_chars.begin(), freq_chars.end());
- std::sort(rare_chars.begin(), rare_chars.end());
- std::fill(isfreq, isfreq + 256, false);
- for (unsigned c = 0; c < 256; ++c)
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_missing;
- for (unsigned i = 0; i < freq_chars.size(); ++i) {
- unsigned char c = freq_chars[i];
- isfreq[c] = true;
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1] = i;
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_freq;
- }
- for (unsigned i = 0; i < rare_chars.size(); ++i) {
- unsigned char c = rare_chars[i];
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1] = i;
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_rare;
- }
-
- unsigned nofreq_cnt = 0L;
- for (unsigned c = 0; c < k_sigma; ++c)
- if (!isfreq[c]) nofreq_cnt += cblock_count[c];
-
-
- if (freq_cnt >= 128) { // type-I cblock
- cblock_type[cblock_id] = true;
-
- // Compute lists of occurrences.
- for (unsigned long i = cblock_beg; i < cblock_end; ++i)
- occ[list_beg2[bwt[i]]++] = i - cblock_beg;
-
- // Precompute helper arrays and and store lookup bits into the header.
- for (unsigned c = 0; c < k_sigma; ++c) {
- lookup_bits_precomputed[c] = utils::log2ceil(cblock_count[c] + 2);
- r.m_cblock_header2[(cblock_id << 8) + c] |= lookup_bits_precomputed[c];
- if (cblock_count[c])
- min_block_size_precomputed[c] = k_cblock_size / cblock_count[c];
- else min_block_size_precomputed[c] = 0;
-
- unsigned refpoint_dist_log = 31 - lookup_bits_precomputed[c];
- unsigned long refpoint_dist = (1UL << refpoint_dist_log);
- unsigned long refpoint_dist_mask = refpoint_dist - 1;
- unsigned long refpoint_dist_mask_neg = (~refpoint_dist_mask);
- refpoint_mask_precomputed[c] = refpoint_dist_mask_neg;
- }
-
- // Actual encoding follows.
- unsigned *cblock_trunk = r.m_freq_trunk + cblock_beg;
- for (unsigned c = 0; c < k_sigma; ++c) {
- unsigned freq = cblock_count[c];
- unsigned min_block_size = min_block_size_precomputed[c];
- unsigned lookup_bits = lookup_bits_precomputed[c];
- unsigned refpoint_dist_mask_neg = refpoint_mask_precomputed[c];
- unsigned c_list_beg = list_beg[c];
-
- for (unsigned j = 0; j < freq; ++j)
- cblock_trunk[c_list_beg + j] = freq + 1;
- if (freq) cblock_trunk[c_list_beg + freq - 1] = freq;
-
- unsigned block_beg = 0;
- for (unsigned j = 0; j < freq; ++j) {
- refpoint_precomputed[j] = (block_beg & refpoint_dist_mask_neg);
- block_beg += min_block_size;
- if ((((unsigned long)block_beg * freq) >> k_cblock_size_log) == j) ++block_beg;
- }
-
- unsigned refpoint, block_id;
- unsigned mask = (~((1UL << lookup_bits) - 1));
- if (freq) {
- for (long j = freq - 1; j >= 0; --j) {
- block_id = (((unsigned long)occ[c_list_beg + j] * freq) >> k_cblock_size_log);
- refpoint = refpoint_precomputed[block_id];
- cblock_trunk[c_list_beg + block_id] &= mask;
- cblock_trunk[c_list_beg + block_id] |= (unsigned)j;
- cblock_trunk[c_list_beg + j] |= ((occ[c_list_beg + j] - refpoint) << lookup_bits);
- }
- }
- }
- } else {
- // Update rare_trunk_size.
- if (rare_cnt) {
- long rare_blocks = 1 + (nofreq_cnt + rare_cnt - 1) / rare_cnt;
- rare_trunk_size[cblock_id] = rare_blocks * rare_cnt;
- }
- }
- }
-
- // Clean up.
- delete[] list_beg;
- delete[] list_beg2;
- delete[] isfreq;
- delete[] cblock_count;
- delete[] lookup_bits_precomputed;
- delete[] min_block_size_precomputed;
- delete[] refpoint_mask_precomputed;
- free(refpoint_precomputed);
- }
-
- void encode_type_II(const unsigned char *bwt, long max_threads) {
- fprintf(stderr, "s3: ");
- long double start = utils::wclock();
-
- unsigned long range_size = (n_cblocks + max_threads - 1) / max_threads;
- unsigned long n_ranges = (n_cblocks + range_size - 1) / range_size;
-
- std::thread **threads = new std::thread*[n_ranges];
- for (unsigned long i = 0; i < n_ranges; ++i) {
- unsigned long range_beg = i * range_size;
- unsigned long range_end = std::min(range_beg + range_size, n_cblocks);
-
- threads[i] = new std::thread(encode_type_II_aux,
- std::ref(*this), range_beg, range_end, bwt);
- }
-
- for (unsigned long i = 0; i < n_ranges; ++i) threads[i]->join();
- for (unsigned long i = 0; i < n_ranges; ++i) delete threads[i];
- delete[] threads;
-
- fprintf(stderr, "%.2Lf ", utils::wclock() - start);
- }
-
- static void encode_type_II_aux(rank4n &r, unsigned long cblock_range_beg,
- unsigned long cblock_range_end, const unsigned char *bwt) {
- unsigned char *freq_map = new unsigned char[k_sigma];
- unsigned char *rare_map = new unsigned char[k_sigma];
- unsigned long *cur_count = new unsigned long[k_sigma];
- unsigned long *off = new unsigned long[k_sigma];
-
- long *sblock_h = new long[k_sigma];
- int *israre = new int[k_sigma];
-
- std::vector<unsigned char> freq_chars;
- std::vector<unsigned char> rare_chars;
-
- for (unsigned long cblock_id = cblock_range_beg; cblock_id < cblock_range_end; ++cblock_id) {
- unsigned long cblock_beg = cblock_id << k_cblock_size_log;
- unsigned long cblock_end = cblock_beg + k_cblock_size;
-
- // Skip the cblock if it was type-I encoded.
- if (r.m_cblock_type[cblock_id >> 3] & (1 << (cblock_id & 7))) continue;
-
- // Retreive symbol counts up to this cblock begin and
- // pointer to rare trunk size from cblock headers.
- for (unsigned c = 0; c < k_sigma; ++c)
- cur_count[c] = (r.m_cblock_header2[(cblock_id << 8) + c] >> (k_cblock_size_log + 6));
-
- long r_filled = (r.m_cblock_header[cblock_id] >> 16);
- long r_ptr = r_filled;
-
- long freq_cnt_log = (r.m_cblock_header[cblock_id] & 255L);
- long rare_cnt_log = ((r.m_cblock_header[cblock_id] >> 8) & 255L);
- long freq_cnt = (1L << freq_cnt_log);
- long rare_cnt = (1L << rare_cnt_log);
- long rare_cnt_mask = rare_cnt - 1;
-
- freq_chars.clear();
- rare_chars.clear();
- std::fill(israre, israre + k_sigma, 1);
- for (unsigned c = 0; c < k_sigma; ++c) {
- unsigned char type = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)];
- if (type == k_char_type_freq) {
- israre[c] = 0;
- freq_chars.push_back(c);
- freq_map[c] = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1];
- } else if (type == k_char_type_rare) {
- rare_chars.push_back(c);
- rare_map[c] = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1];
- freq_map[c] = freq_cnt - 1;
- }
- }
-
- if (rare_chars.empty()) {
- rare_cnt_log = 0;
- rare_cnt = 0;
- }
-
- long sblock_id = (cblock_beg >> k_sblock_size_log);
- std::copy(r.m_sblock_header + (sblock_id << 8), r.m_sblock_header + (sblock_id << 8) + k_sigma, sblock_h);
- for (long j = 0; j < k_sigma; ++j) off[j] = cur_count[j] - sblock_h[j];
-
- long nofreq_cnt = 0;
- long freq_chars_size = (long)freq_chars.size();
- long rare_chars_size = (long)rare_chars.size();
- for (unsigned long i = cblock_beg; i < cblock_end; i += freq_cnt) {
- for (long j = 0; j < freq_chars_size; ++j) {
- unsigned char ch = freq_chars[j];
- r.m_freq_trunk[i + j] = (off[ch] << 8);
- }
- r.m_freq_trunk[i + freq_cnt - 1] = (nofreq_cnt << 8);
- for (unsigned long j = i; j < i + freq_cnt; ++j) {
- unsigned char c = bwt[j];
- r.m_freq_trunk[j] |= freq_map[c];
- if (israre[c]) {
- if (!(nofreq_cnt & rare_cnt_mask)) {
- for (long jj = 0; jj < rare_chars_size; ++jj) {
- unsigned char ch = rare_chars[jj];
- r.m_rare_trunk[r_filled++] = (off[ch] << 8);
- }
- r_filled += rare_cnt - rare_chars_size;
- }
- r.m_rare_trunk[r_ptr++] |= rare_map[c];
- }
- ++off[c];
- nofreq_cnt += israre[c];
- }
- }
- for (long i = 0; i < k_sigma; ++i)
- cur_count[i] = sblock_h[i] + off[i];
-
- for (long j = 0; j < rare_cnt; ++j) {
- unsigned char ch = (j < (long)rare_chars.size() ? rare_chars[j] : 0);
- long local_rank = cur_count[ch] - r.m_sblock_header[(sblock_id << 8) + ch];
- r.m_rare_trunk[r_filled++] = (local_rank << 8);
- }
- }
-
- delete[] cur_count;
- delete[] sblock_h;
- delete[] freq_map;
- delete[] rare_map;
- delete[] israre;
- delete[] off;
- }
-
- inline long rank(long i, unsigned char c) const {
- if (i <= 0) return 0L;
- else if ((unsigned long)i >= m_length) return m_count[c];
-
- unsigned long cblock_id = (i >> k_cblock_size_log);
- if (m_cblock_type[cblock_id >> 3] & (1 << (cblock_id & 7))) { // type-I cblock
- long cblock_beg = (i & k_cblock_size_mask_neg);
- long cblock_i = (i & k_cblock_size_mask); // offset in cblock
-
- // Extract the rank up to the start of cblock.
- long rank_up_to_cblock = (m_cblock_header2[(cblock_id << k_sigma_log) + c] >> (k_cblock_size_log + 6));
-
- // Now we compute the number of occurrences of c inside the cblock.
- // First, decode the beginning and end of c's occurrence list.
- long list_beg = ((m_cblock_header2[(cblock_id << k_sigma_log) + c] >> 5) & k_2cblock_size_mask);
- long list_end = ((c == k_sigma - 1) ? k_cblock_size :
- ((m_cblock_header2[(cblock_id << k_sigma_log) + c + 1] >> 5) & k_2cblock_size_mask));
- if (list_beg == list_end) return rank_up_to_cblock;
-
- // Compute the distance from i to the closest reference point on the left.
- long lookup_bits = (m_cblock_header2[(cblock_id << k_sigma_log) + c] & 31);
- long refpoint_dist_log = 31 - lookup_bits;
- long refpoint_disk_mask = (1L << refpoint_dist_log) - 1;
- long i_refpoint_offset = (cblock_i & refpoint_disk_mask);
-
- // Compute threshold of symbol c inside the current cblock.
- long threshold = (1L << (k_cblock_size_log - lookup_bits + 1));
-
- // Compute the id of block containing i.
- long list_size = list_end - list_beg;
- long approx = ((cblock_i * list_size) >> k_cblock_size_log);
-
- // Extract the lookup table entry.
- long lookup_mask = (1L << lookup_bits) - 1;
- long begin = (m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask);
-
- // Empty block optimization.
- if (begin == list_size + 1) {
- // Block containing cblock_i is empty, just find the beginning.
- ++approx;
- while ((m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask) == list_size + 1) ++approx;
- begin = (m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask);
- return rank_up_to_cblock + begin;
- }
-
- long next_block_begin = (approx + 1 == list_size) ? list_size :
- (m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask);
-
- // Correct next_block_begin.
- if (approx + 1 != list_size && next_block_begin == list_size + 1) {
- ++approx;
- while ((m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask) == list_size + 1) ++approx;
- next_block_begin = (m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask);
- }
-
- // Correct the value of begin and return the answer.
- if (i_refpoint_offset >= threshold) {
- // Case 1: easy case, will happen most of the time.
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset)
- ++begin;
-
- return rank_up_to_cblock + begin;
- } else {
- // Case 2: executed very rarely.
- if (begin == next_block_begin || (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < (2 * threshold)) {
- // Case 2a: the value in the occ list was small -> the ref
- // point for i and for the block are the same, we
- // proceed as before, without modifying i_refpoint_offset.
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset)
- ++begin;
-
- return rank_up_to_cblock + begin;
- } else {
- // Case 2b: block occurrences were encoded wrt to the
- // previous ref point -> we increase i_refpoint_offset
- // by refpoint_dist and proceed as before.
- i_refpoint_offset += (1L << refpoint_dist_log);
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset)
- ++begin;
-
- return rank_up_to_cblock + begin;
- }
- }
- } else { // type-II cblock
- long sblock_id = (i >> k_sblock_size_log);
- long sblock_rank = m_sblock_header[(sblock_id << 8) + c];
-
- unsigned char type = m_cblock_mapping[2 * (c * n_cblocks + cblock_id)];
- unsigned char c_map = m_cblock_mapping[2 * (c * n_cblocks + cblock_id) + 1];
-
- long freq_cnt_bits = (m_cblock_header[cblock_id] & 255L);
- long rare_cnt_bits = ((m_cblock_header[cblock_id] >> 8) & 255L);
- long block_id = (i >> freq_cnt_bits);
-
- if (type == k_char_type_freq) {
- // Case 1 (fastest): symbol c was frequent in the context block.
- // Answer a query using frequent trunk.
- long block_rank = m_freq_trunk[(block_id << freq_cnt_bits) + c_map] >> 8;
- long extra = 0;
- for (long j = (block_id << freq_cnt_bits); j < i; ++j)
- if ((m_freq_trunk[j] & 255) == c_map) ++extra;
-
- return sblock_rank + block_rank + extra;
- } else if (type == k_char_type_rare) {
- // Case 2: symbol c was rare inside the context block.
- // Compute new_i.
- long rare_trunk_ptr = (m_cblock_header[cblock_id] >> 16);
- long new_i = m_freq_trunk[((block_id + 1) << freq_cnt_bits) - 1] >> 8;
- for (long j = (block_id << freq_cnt_bits); j < i; ++j)
- if ((m_freq_trunk[j] & 255) + 1 == (1U << freq_cnt_bits)) ++new_i;
-
- // Answer a query on rare trunk.
- long rare_block_id = (new_i >> rare_cnt_bits);
- long block_rank = m_rare_trunk[rare_trunk_ptr +
- (rare_block_id << rare_cnt_bits) + c_map] >> 8;
- long extra = 0;
- for (long j = (rare_block_id << rare_cnt_bits); j < new_i; ++j)
- if ((m_rare_trunk[rare_trunk_ptr + j] & 255) == c_map) ++extra;
-
- return sblock_rank + block_rank + extra;
- } else {
- // Case 3: symbol c does not occur in the context block.
- // Find the first cblock where c occurrs.
- while (cblock_id < n_cblocks && (cblock_id & k_cblocks_in_sblock_mask) &&
- m_cblock_mapping[2 * (c * n_cblocks + cblock_id)] == k_char_type_missing)
- ++cblock_id;
-
- if (cblock_id == n_cblocks) {
- // We reached the end of encoding, return count[c].
- return m_count[c];
- } else if (!(cblock_id & k_cblocks_in_sblock_mask)) {
- // We reached the boundary of superblock,
- // retreive the answer from superblock header.
- return m_sblock_header[256 * (cblock_id >> k_cblocks_in_sblock_log) + c];
- } else {
- // We found cblock where c occurrs, but it wasn't on the
- // sblock boundary. In the recursive call this will either
- // be case 1 or case 2.
- return rank(cblock_id << k_cblock_size_log, c);
- }
- }
- }
- }
-
- ~rank4n() {
- if (m_length) {
- free(m_sblock_header);
- free(m_cblock_header);
- free(m_cblock_header2);
- free(m_cblock_mapping);
- free(m_cblock_type);
- free(m_freq_trunk);
- free(m_rare_trunk);
- }
- free(m_count);
- }
-};
-
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned long rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblock_size = (1L << k_cblock_size_log);
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned long rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblock_size_mask = (1L << k_cblock_size_log) - 1;
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_2cblock_size = (2 << k_cblock_size_log);
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_2cblock_size_mask = (2 << k_cblock_size_log) - 1;
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_sigma = (1 << k_sigma_log);
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_sigma_mask = (1 << k_sigma_log) - 1;
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned long rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblock_size_mask_neg = ~((1L << k_cblock_size_log) - 1);
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblocks_in_sblock_log = k_sblock_size_log - k_cblock_size_log;
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblocks_in_sblock = (1 << (k_sblock_size_log - k_cblock_size_log));
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblocks_in_sblock_mask = (1 << (k_sblock_size_log - k_cblock_size_log)) - 1;
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_sblock_size = (1 << k_sblock_size_log);
-
-template<typename saidx_t, unsigned pagesize_log, unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<saidx_t, pagesize_log, k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_sblock_size_mask = (1 << k_sblock_size_log) - 1;
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_RANK_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/sparse_isa.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/sparse_isa.h
deleted file mode 100644
index 68b2076f..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/sparse_isa.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/sparse_isa.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section DESCRIPTION
- *
- * Sparse ISA encoding based on the ISAs algorithm computing
- * Lempel-Ziv (LZ77) factorization described in
- *
- * Dominik Kempa, Simon J. Puglisi:
- * Lempel-Ziv factorization: Simple, fast, practical.
- * In Proc. ALENEX 2013, p. 103-112.
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_SPARSE_ISA_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_SPARSE_ISA_H_INCLUDED
-
-#include <algorithm>
-#include <thread>
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-template<typename pagearray_type, typename rank_type, unsigned isa_sampling_rate_log>
-struct sparse_isa {
- static const unsigned isa_sampling_rate = (1U << isa_sampling_rate_log);
- static const unsigned isa_sampling_rate_mask = isa_sampling_rate - 1;
- static const long k_sigma = 256;
-
- static void compute_sparse_isa_aux(const pagearray_type &bwtsa, long block_beg,
- long block_end, long psa_size, long *sparse_isa, long &last) {
- for (long j = block_beg; j < block_end; ++j) {
- long sa_j = bwtsa[j].sa;
- if (!(sa_j & isa_sampling_rate_mask))
- sparse_isa[sa_j >> isa_sampling_rate_log] = j;
- if (sa_j == psa_size - 1) last = j;
- }
- }
-
- sparse_isa(const pagearray_type *bwtsa, const unsigned char *text,
- const rank_type *rank, long length, long i0, long max_threads) {
- m_bwtsa = bwtsa;
- m_length = length;
- m_rank = rank;
- m_text = text;
- m_i0 = i0;
-
- long elems = (m_length + isa_sampling_rate - 1) / isa_sampling_rate + 1;
- m_sparse_isa = (long *)malloc(elems * sizeof(long));
-
- long max_block_size = (m_length + max_threads - 1) / max_threads;
- long n_blocks = (m_length + max_block_size - 1) / max_block_size;
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long t = 0; t < n_blocks; ++t) {
- long block_beg = t * max_block_size;
- long block_end = std::min(block_beg + max_block_size, m_length);
-
- threads[t] = new std::thread(compute_sparse_isa_aux, std::ref(*m_bwtsa),
- block_beg, block_end, m_length, m_sparse_isa, std::ref(m_last_isa));
- }
-
- for (long t = 0; t < n_blocks; ++t) threads[t]->join();
- for (long t = 0; t < n_blocks; ++t) delete threads[t];
- delete[] threads;
-
- m_count = (long *)malloc(k_sigma * sizeof(long));
- std::copy(rank->m_count, rank->m_count + k_sigma, m_count);
- ++m_count[text[length - 1]];
- --m_count[0];
-
- for (long i = 0, s = 0; i < k_sigma; ++i) {
- long t = m_count[i];
- m_count[i] = s;
- s += t;
- }
- }
-
- inline long query(long j) const {
- long isa_i;
- long i = ((j + isa_sampling_rate - 1) >> isa_sampling_rate_log);
- if ((i << isa_sampling_rate_log) < m_length) {
- isa_i = m_sparse_isa[i];
- i <<= isa_sampling_rate_log;
- } else {
- isa_i = m_last_isa;
- i = m_length - 1;
- }
-
- while (i != j) {
- // Compute ISA[i - 1] from ISA[i].
- // Invariant:
- // isa_i = ISA[i]
- // j <= i
- unsigned char c = m_text[i - 1];
- int delta = (isa_i > m_i0 && c == 0);
-
- isa_i = m_count[c] + m_rank->rank(isa_i, c) - delta;
- if (isa_i < 0 || ((long)((*m_bwtsa)[isa_i].sa)) != i - 1)
- ++isa_i;
-
- --i;
- }
-
- return isa_i;
- }
-
- ~sparse_isa() {
- free(m_sparse_isa);
- free(m_count);
- }
-
-
-private:
- long m_length;
- long m_last_isa;
- long m_i0;
-
- long *m_count;
- long *m_sparse_isa;
-
- const unsigned char *m_text;
- const pagearray_type *m_bwtsa;
- const rank_type *m_rank;
-};
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_SPARSE_ISA_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/srank_aux.h b/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/srank_aux.h
deleted file mode 100644
index 139ad357..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/inmem_psascan_src/srank_aux.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * @file src/psascan_src/inmem_psascan_src/srank_aux.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_INMEM_PSASCAN_SRC_SRANK_AUX_H_INCLUDED
-#define __PSASCAN_SRC_INMEM_PSASCAN_SRC_SRANK_AUX_H_INCLUDED
-
-
-namespace psascan_private {
-namespace inmem_psascan_private {
-
-//==============================================================================
-// Compute ms-decomposition of text[0..length) from ms-decomposition of
-// text[0..length - 1). The result is returned via updated values s, p, r.
-//==============================================================================
-template<typename T>
-inline void update_ms(const unsigned char *text, T length, T &s, T &p) {
- if (length == 1) { s = 0; p = 1; return; }
-
- T i = length - 1;
- while (i < length) {
- unsigned char a = text[i - p];
- unsigned char b = text[i];
-
- if (a > b) p = i - s + 1;
- else if (a < b) {
- long r = (i - s);
- while (r >= p) r -= p;
- i -= r;
- s = i;
- p = 1;
- }
-
- ++i;
- }
-}
-
-} // namespace inmem_psascan_private
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_INMEM_PSASCAN_SRC_SRANK_AUX_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/merge.h b/exttools/pSAscan-0.1.0/src/psascan_src/merge.h
deleted file mode 100644
index 6fdef5e8..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/merge.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/**
- * @file src/psascan_src/merge.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_MERGE_H_INCLUDED
-#define __PSASCAN_SRC_MERGE_H_INCLUDED
-
-#include <cstdio>
-#include <cmath>
-#include <string>
-#include <vector>
-#include <algorithm>
-
-#include "utils.h"
-#include "uint40.h"
-#include "distributed_file.h"
-#include "half_block_info.h"
-#include "async_stream_writer.h"
-#include "async_vbyte_stream_reader.h"
-
-
-namespace psascan_private {
-
-// Merge partial suffix arrays into final suffix array.
-template<typename block_offset_type>
-void merge(std::string output_filename, long ram_use, std::vector<half_block_info<block_offset_type> > &hblock_info) {
- long n_block = (long)hblock_info.size();
- long text_length = 0;
-
- std::sort(hblock_info.begin(), hblock_info.end());
- for (size_t j = 0; j < hblock_info.size(); ++j)
- text_length += hblock_info[j].end - hblock_info[j].beg;
-
- long pieces = (1 + sizeof(block_offset_type)) * n_block - 1 + sizeof(uint40);
- long buffer_size = (ram_use + pieces - 1) / pieces;
-
- fprintf(stderr, "\nMerge partial suffix arrays:\n");
- fprintf(stderr, " buffer size per block = %ld (%.2LfMiB)\n",
- sizeof(block_offset_type) * buffer_size,
- (1.L * sizeof(block_offset_type) * buffer_size) / (1 << 20));
- fprintf(stderr, " sizeof(output_type) = %ld\n", sizeof(uint40));
-
- typedef async_vbyte_stream_reader<long> vbyte_reader_type;
- typedef async_stream_writer<uint40> output_writer_type;
-
- output_writer_type *output = new output_writer_type(output_filename, sizeof(uint40) * buffer_size);
- vbyte_reader_type **gap = new vbyte_reader_type*[n_block - 1];
- for (long i = 0; i < n_block; ++i) {
- hblock_info[i].psa->initialize_reading(sizeof(block_offset_type) * buffer_size);
- if (i + 1 != n_block)
- gap[i] = new vbyte_reader_type(hblock_info[i].gap_filename, buffer_size);
- }
-
- long *gap_head = new long[n_block];
- for (long i = 0; i + 1 < n_block; ++i)
- gap_head[i] = gap[i]->read();
- gap_head[n_block - 1] = 0;
-
- long tmp = (long)sqrtl((long double)n_block);
- long sblock_size = 1L;
- long sblock_size_log = 0;
- while (sblock_size * 2L <= tmp) {
- sblock_size *= 2L;
- ++sblock_size_log;
- }
-
- long n_sblocks = (n_block + sblock_size - 1) / sblock_size;
- std::pair<long, long> *sblock_info = new std::pair<long, long>[n_sblocks];
-
- for (long i = 0; i < n_sblocks; ++i) {
- long sblock_beg = i * sblock_size;
- long sblock_end = std::min(n_block, sblock_beg + sblock_size);
-
- sblock_info[i].second = 0;
- sblock_info[i].first = gap_head[sblock_beg];
- for (long j = sblock_beg + 1; j < sblock_end; ++j)
- sblock_info[i].first = std::min(sblock_info[i].first, gap_head[j]);
- }
-
- long double merge_start = utils::wclock();
- for (long i = 0, dbg = 0; i < text_length; ++i, ++dbg) {
- if (dbg == (1 << 23)) {
- long double elapsed = utils::wclock() - merge_start;
- long inp_vol = (1L + sizeof(block_offset_type)) * i;
- long out_vol = sizeof(uint40) * i;
- long tot_vol = inp_vol + out_vol;
- long double tot_vol_m = tot_vol / (1024.L * 1024);
- long double io_speed = tot_vol_m / elapsed;
- fprintf(stderr, "\r %.1Lf%%. Time = %.2Lfs. I/O: %2.LfMiB/s",
- (100.L * i) / text_length, elapsed, io_speed);
- dbg = 0;
- }
-
- // Find the superblock containing gap head equal to zero.
- long k = 0;
- while (sblock_info[k].first != 0) {
- sblock_info[k].first--;
- sblock_info[k].second++;
- ++k;
- }
-
- // Find the block with the gap head equal to zero.
- long sblock_beg = (k << sblock_size_log);
- long sblock_end = std::min(n_block, sblock_beg + sblock_size);
-
- long new_min = text_length;
- long j = sblock_beg;
- while (gap_head[j] != sblock_info[k].second) {
- gap_head[j] -= (sblock_info[k].second + 1);
- new_min = std::min(new_min, gap_head[j]);
- ++j;
- }
-
- long SA_i = hblock_info[j].psa->read() + hblock_info[j].beg;
-
- if (j != n_block - 1) gap_head[j] = gap[j]->read();
- new_min = std::min(new_min, gap_head[j]);
- ++j;
-
- while (j < sblock_end) {
- gap_head[j] -= sblock_info[k].second;
- new_min = std::min(new_min, gap_head[j]);
- ++j;
- }
-
- sblock_info[k].first = new_min;
- sblock_info[k].second = 0;
-
- output->write(SA_i);
- }
- long double merge_time = utils::wclock() - merge_start;
- long io_volume = (1 + sizeof(block_offset_type) + sizeof(uint40)) * text_length;
- long double io_speed = (io_volume / (1024.L * 1024)) / merge_time;
- fprintf(stderr, "\r 100.0%%. Time: %.2Lfs. I/O: %.2LfMiB/s\n", merge_time, io_speed);
-
- // Clean up.
- delete output;
- for (long i = 0; i < n_block; ++i) {
- hblock_info[i].psa->finish_reading();
- delete hblock_info[i].psa;
- if (i + 1 != n_block)
- delete gap[i];
- }
-
- delete[] gap;
- delete[] gap_head;
- delete[] sblock_info;
-
- for (int i = 0; i + 1 < n_block; ++i)
- utils::file_delete(hblock_info[i].gap_filename);
-}
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_MERGE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/multifile.h b/exttools/pSAscan-0.1.0/src/psascan_src/multifile.h
deleted file mode 100644
index d2a5bf72..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/multifile.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * @file src/psascan_src/multifile.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_MULTIFILE_H_INCLUDED
-#define __PSASCAN_SRC_MULTIFILE_H_INCLUDED
-
-#include <vector>
-#include <string>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-struct single_file_info {
- long m_beg;
- long m_end;
- std::string m_filename;
-
- single_file_info(long beg, long end, std::string filename) {
- m_beg = beg;
- m_end = end;
- m_filename = filename;
- }
-};
-
-struct multifile {
- std::vector<single_file_info> files_info;
-
- void add_file(long beg, long end, std::string filename) {
- files_info.push_back(single_file_info(beg, end, filename));
- }
-
- ~multifile() {
- for (size_t i = 0; i < files_info.size(); ++i)
- utils::file_delete(files_info[i].m_filename);
- }
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_MULTIFILE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/multifile_bit_stream_reader.h b/exttools/pSAscan-0.1.0/src/psascan_src/multifile_bit_stream_reader.h
deleted file mode 100644
index 743d67b4..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/multifile_bit_stream_reader.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/**
- * @file src/psascan_src/multifile_bit_stream_reader.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED
-#define __PSASCAN_SRC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED
-
-#include <cstdio>
-#include <vector>
-
-#include "utils.h"
-#include "multifile.h"
-
-
-namespace psascan_private {
-
-struct multifile_bit_stream_reader {
-private:
- static const long k_bufsize;
-
- // info for currently accessed file.
- std::FILE *m_file;
- long m_file_beg;
- long m_file_end;
-
- unsigned char *m_buffer;
- long m_offset; // number of the first bit in the buffer
- long m_filled; // how many bits we have in a buffer
-
- long cur_bit_absolute;
- long cur_bit_buffer;
- long cur_bit;
- long cur_byte;
-
- std::vector<single_file_info> files_info;
-
-public:
- multifile_bit_stream_reader(const multifile *m) {
- m_file = NULL;
- m_file_beg = 0;
- m_file_end = 0;
- m_buffer = new unsigned char[k_bufsize];
-
- if (m != NULL)
- files_info = m->files_info;
- }
-
- // Subsequent access operations are quaranteed
- // to be with increasing argument.
- bool access(long i) {
- if (i < m_file_beg || m_file_end <= i) {
- open_file_for_index(i);
- i -= m_file_beg;
- } else {
- i -= m_file_beg;
-
- if (i < m_offset || m_offset + m_filled <= i) {
- refill(i);
- }
- }
-
- i -= m_offset;
- return (m_buffer[i >> 3] & (1 << (i & 7)));
- }
-
- void initialize_sequential_reading(long i) {
- open_file_for_index(i);
-
- cur_bit_absolute = i;
- cur_bit_buffer = cur_bit_absolute - (m_file_beg + m_offset);
- cur_byte = (cur_bit_buffer >> 3);
- cur_bit = (cur_bit_buffer & 7);
- }
-
- inline bool read() {
- if (cur_bit_absolute == m_file_end) open_file_for_index(m_file_end);
- if (cur_bit_buffer == m_filled) refill(m_offset + m_filled);
-
- bool ans = (m_buffer[cur_byte] & (1 << cur_bit));
- ++cur_bit;
- if (cur_bit == 8) {
- cur_bit = 0;
- ++cur_byte;
- }
-
- ++cur_bit_buffer;
- ++cur_bit_absolute;
- return ans;
- }
-
- ~multifile_bit_stream_reader() {
- if (m_file)
- std::fclose(m_file);
- delete[] m_buffer;
- }
-
-private:
- void refill(long offset) {
- offset -= (offset & 7);
- if (m_offset + m_filled != offset)
- std::fseek(m_file, (offset >> 3), SEEK_SET);
- long bytes_read = std::fread(m_buffer, 1, k_bufsize, m_file);
- m_filled = std::min(m_file_end - offset, 8L * bytes_read); // in bits
- m_offset = offset; // in bits
-
- cur_byte = 0; // in the buffer
- cur_bit = 0; // in the current byte
- cur_bit_buffer = 0;
- }
-
- void open_file_for_index(long i) {
- // Close current file (if any is open).
- if (m_file) std::fclose(m_file);
-
- // First find the right file.
- long id = 0;
- while (i < files_info[id].m_beg || files_info[id].m_end <= i)
- ++id;
-
- m_file = utils::open_file(files_info[id].m_filename, "r");
- m_file_beg = files_info[id].m_beg;
- m_file_end = files_info[id].m_end;
-
- cur_bit_absolute = m_file_beg;
- cur_bit_buffer = 0;
- cur_bit = 0;
- cur_byte = 0;
-
- m_offset = 0;
- m_filled = 0;
-
- refill(i - m_file_beg);
- }
-};
-
-const long multifile_bit_stream_reader::k_bufsize = (1L << 20);
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_MULTIFILE_BIT_STREAM_READER_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/parallel_utils.h b/exttools/pSAscan-0.1.0/src/psascan_src/parallel_utils.h
deleted file mode 100644
index d0daa61f..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/parallel_utils.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/**
- * @file src/psascan_src/parallel_utils.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_PARALLEL_UTILS_H_INCLUDED
-#define __PSASCAN_SRC_PARALLEL_UTILS_H_INCLUDED
-
-#include <thread>
-#include <algorithm>
-
-
-namespace psascan_private {
-namespace parallel_utils {
-
-//==============================================================================
-// Encode tab[0..length) using vbyte encoding and write to dest sequentially.
-//==============================================================================
-void encode_vbyte_slab(const long *tab, long length, unsigned char *dest) {
- long ptr = 0L;
- for (long j = 0; j < length; ++j) {
- long x = tab[j];
- while (x > 127) {
- dest[ptr++] = ((x & 0x7f) | 0x80);
- x >>= 7;
- }
- dest[ptr++] = x;
- }
-}
-
-
-//==============================================================================
-// Compute the size of vbyte encoding of tab[0..length).
-//==============================================================================
-void compute_size_of_vbyte_slab(const long *tab, long length, long &result) {
- result = 0L;
- for (long j = 0; j < length; ++j) {
- long x = tab[j];
- while (x > 127) {
- ++result;
- x >>= 7;
- }
- ++result;
- }
-}
-
-
-//==============================================================================
-// Encode tab[0..length) using v-byte encoding and write to dest in parallel.
-// We assume that dest is sufficiently large to hold the output.
-// The function returns the length of the slab.
-//==============================================================================
-long convert_array_to_vbyte_slab(const long *tab, long length, unsigned char *dest, long max_threads) {
- long max_block_size = (length + max_threads - 1) / max_threads;
- long n_blocks = (length + max_block_size - 1) / max_block_size;
-
- // 1
- //
- // Compute the length of slab for each block.
- long *block_slab_length = new long[n_blocks];
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long t = 0; t < n_blocks; ++t) {
- long block_beg = t * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
- long block_size = block_end - block_beg;
-
- threads[t] = new std::thread(compute_size_of_vbyte_slab,
- tab + block_beg, block_size, std::ref(block_slab_length[t]));
- }
-
- for (long t = 0; t < n_blocks; ++t) threads[t]->join();
- for (long t = 0; t < n_blocks; ++t) delete threads[t];
-
- // 2
- //
- // Compute cummulative sum for block slab lengths.
- long total_slab_length = 0L;
- for (long j = 0; j < n_blocks; ++j) {
- long temp = block_slab_length[j];
- block_slab_length[j] = total_slab_length;
- total_slab_length += temp;
- }
-
- // 3
- //
- // Compute the slabs. Now we know where each slab begins.
- for (long t = 0; t < n_blocks; ++t) {
- long block_beg = t * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
- long block_size = block_end - block_beg;
-
- threads[t] = new std::thread(encode_vbyte_slab,
- tab + block_beg, block_size, dest + block_slab_length[t]);
- }
-
- for (long t = 0; t < n_blocks; ++t) threads[t]->join();
- for (long t = 0; t < n_blocks; ++t) delete threads[t];
- delete[] threads;
- delete[] block_slab_length;
-
- return total_slab_length;
-}
-
-
-//==============================================================================
-// Copy src[0..length) to dest[0..length).
-//==============================================================================
-template<typename T, typename S>
-void parallel_copy_aux(const T *src, S *dest, long length) {
- for (long i = 0; i < length; ++i)
- dest[i] = (S)src[i];
-}
-
-
-//==============================================================================
-// Parallel version of std::copy (with slightly different interface).
-// Conversion from T to S has to make sense.
-//==============================================================================
-template<typename T, typename S>
-void parallel_copy(const T *src, S *dest, long length, long max_threads) {
- long max_block_size = (length + max_threads - 1) / max_threads;
- long n_blocks = (length + max_block_size - 1) / max_block_size;
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_beg = i * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(parallel_copy_aux<T, S>,
- src + block_beg, dest + block_beg, block_size);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-}
-
-
-//==============================================================================
-// Set all values in tab[0..length) to x.
-//==============================================================================
-template<typename T>
-void parallel_fill_aux(T *tab, long length, T x) {
- for (long i = 0; i < length; ++i)
- tab[i] = x;
-}
-
-
-//==============================================================================
-// Parallel version of std::fill (with slightly different interface).
-//==============================================================================
-template<typename T>
-void parallel_fill(T *tab, long length, T x, long max_threads) {
- long max_block_size = (length + max_threads - 1) / max_threads;
- long n_blocks = (length + max_block_size - 1) / max_block_size;
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long i = 0; i < n_blocks; ++i) {
- long block_beg = i * max_block_size;
- long block_end = std::min(block_beg + max_block_size, length);
- long block_size = block_end - block_beg;
-
- threads[i] = new std::thread(parallel_fill_aux<T>,
- tab + block_beg, block_size, x);
- }
-
- for (long i = 0; i < n_blocks; ++i) threads[i]->join();
- for (long i = 0; i < n_blocks; ++i) delete threads[i];
- delete[] threads;
-}
-
-} // namespace parallel_utils
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_PARALLEL_UTILS_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/partial_sufsort.h b/exttools/pSAscan-0.1.0/src/psascan_src/partial_sufsort.h
deleted file mode 100644
index 50a49cdd..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/partial_sufsort.h
+++ /dev/null
@@ -1,590 +0,0 @@
-/**
- * @file src/psascan_src/partial_sufsort.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_PARTIAL_SUFSORT_H_INCLUDED
-#define __PSASCAN_SRC_PARTIAL_SUFSORT_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <string>
-#include <thread>
-#include <algorithm>
-#include <vector>
-#include <sys/stat.h>
-#include <fcntl.h>
-
-#include "inmem_psascan_src/inmem_psascan.h"
-#include "utils.h"
-#include "rank.h"
-#include "gap_array.h"
-#include "bitvector.h"
-#include "multifile.h"
-#include "distributed_file.h"
-#include "half_block_info.h"
-#include "bwt_merge.h"
-#include "compute_gap.h"
-#include "em_compute_initial_ranks.h"
-#include "compute_right_gap.h"
-#include "compute_left_gap.h"
-
-
-namespace psascan_private {
-
-//=============================================================================
-// The main function processing the block.
-//=============================================================================
-template<typename block_offset_type>
-void process_block(long block_beg, long block_end, long text_length, long ram_use,
- long max_threads, long gap_buf_size, std::string text_filename,
- std::string output_filename, std::string gap_filename,
- multifile *newtail_gt_begin_rev, const multifile *tail_gt_begin_rev,
- std::vector<half_block_info<block_offset_type> > &hblock_info, bool verbose) {
- long block_size = block_end - block_beg;
-
- if (block_end != text_length && block_size <= 1) {
- fprintf(stderr, "Error: any block other than the last one has to be of length at least two.\n");
- std::exit(EXIT_FAILURE);
- }
-
- long block_tail_beg = block_end;
- long block_tail_end = text_length;
-
- bool last_block = (block_end == text_length);
- bool first_block = (block_beg == 0);
-
- long left_block_size;
- if (!last_block) left_block_size = std::max(1L, block_size / 2L);
- else left_block_size = std::min(block_size, std::max(1L, ram_use / 10L));
- long right_block_size = block_size - left_block_size;
- long left_block_beg = block_beg;
- long left_block_end = block_beg + left_block_size;
- long right_block_beg = left_block_end;
- long right_block_end = block_end;
- // Invariant; left_block_size > 0.
-
- fprintf(stderr, " Block size = %ld (%.2LfMiB)\n", block_size, 1.L * block_size / (1 << 20));
- fprintf(stderr, " Left half-block size = %ld (%.2LfMiB)\n", left_block_size, 1.L * left_block_size / (1 << 20));
- fprintf(stderr, " Right half-block size = %ld (%.2LfMiB)\n", right_block_size, 1.L * right_block_size / (1 << 20));
-
- std::vector<long> block_initial_ranks;
- unsigned char block_last_symbol = 0;
-
- long right_block_i0 = 0;
- long left_block_i0 = 0;
-
- std::string right_block_pbwt_fname = output_filename + "." + utils::random_string_hash();
- std::string right_block_gt_begin_rev_fname = output_filename + "." + utils::random_string_hash();
-
- half_block_info<block_offset_type> info_left;
- half_block_info<block_offset_type> info_right;
-
- info_left.beg = left_block_beg;
- info_left.end = left_block_end;
- if (right_block_size > 0) {
- info_right.beg = right_block_beg;
- info_right.end = right_block_end;
- }
-
- //----------------------------------------------------------------------------
- // STEP 1: Process right half-block.
- //----------------------------------------------------------------------------
- multifile *right_block_gt_begin_rev = NULL;
- unsigned char *right_block = NULL;
-
- if (right_block_size > 0) {
- fprintf(stderr, " Process right half-block:\n");
-
- // 1.a
- //
- // Read the right half-block from disk.
- fprintf(stderr, " Read: ");
- right_block = (unsigned char *)malloc(right_block_size);
- long double right_block_read_start = utils::wclock();
- utils::read_block(text_filename, right_block_beg, right_block_size, right_block);
- block_last_symbol = right_block[right_block_size - 1];
- long double right_block_read_time = utils::wclock() - right_block_read_start;
- long double right_block_read_io = (right_block_size / (1024.L * 1024)) / right_block_read_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_block_read_time, right_block_read_io);
-
- // 1.b
- //
- // Compute partial SA, BWT and gt_begin of the right half-block.
-
- // Allocate SA, BWT and gt_begin.
- unsigned char *right_block_sabwt = (unsigned char *)malloc(right_block_size * (sizeof(block_offset_type) + 1));
- block_offset_type *right_block_psa_ptr = (block_offset_type *)right_block_sabwt;
- unsigned char *right_block_bwt = (unsigned char *)(right_block_psa_ptr + right_block_size);
- bitvector *right_block_gt_begin_rev_bv = new bitvector(right_block_size);
-
- // Start the timer.
- fprintf(stderr, " Internal memory sufsort: ");
- if (verbose) fprintf(stderr, "\n%s\n", std::string(60, '*').c_str());
- long double right_block_sascan_start = utils::wclock();
-
- // Close stderr.
- int stderr_backup = 0;
- if (!verbose) {
- std::fflush(stderr);
- stderr_backup = dup(2);
- int stderr_temp = open("/dev/null", O_WRONLY);
- dup2(stderr_temp, 2);
- close(stderr_temp);
- }
-
- // Run in-memory pSAscan.
- inmem_psascan_private::inmem_psascan<block_offset_type>(right_block, right_block_size, right_block_sabwt,
- max_threads, !last_block, true, right_block_gt_begin_rev_bv, -1, right_block_beg, right_block_end,
- text_length, text_filename, tail_gt_begin_rev, &right_block_i0);
-
- // Restore stderr.
- if (!verbose) {
- std::fflush(stderr);
- dup2(stderr_backup, 2);
- close(stderr_backup);
- }
-
- // Print summary.
- long double right_block_sascan_time = utils::wclock() - right_block_sascan_start;
- long double right_block_sascan_speed = (right_block_size / (1024.L * 1024)) / right_block_sascan_time;
- if (verbose) fprintf(stderr, "%s\n", std::string(60, '*').c_str());
- fprintf(stderr, "%.2Lfs. Speed: %.2LfMiB/s\n", right_block_sascan_time, right_block_sascan_speed);
-
- // 1.c
- //
- // Compute the first term of initial ranks for the block.
- if (!last_block) {
- fprintf(stderr, " Compute initial tail ranks (part 1): ");
- long double initial_ranks_first_term_start = utils::wclock();
- em_compute_initial_ranks<block_offset_type>(right_block, right_block_psa_ptr, right_block_bwt,
- right_block_i0, right_block_beg, right_block_end, text_length, text_filename,
- tail_gt_begin_rev, block_initial_ranks, max_threads, block_tail_end, 0); // Note the space usage!
-
- size_t vec_size = block_initial_ranks.size();
- for (size_t j = 0; j + 1 < vec_size; ++j)
- block_initial_ranks[j] = block_initial_ranks[j + 1];
- block_initial_ranks[vec_size - 1] = 0;
-
- fprintf(stderr, "%.2Lfs\n", utils::wclock() - initial_ranks_first_term_start);
- }
-
- // 1.d
- //
- // Write the partial SA of the right half-block to disk.
- fprintf(stderr, " Write partial SA to disk: ");
- long double right_psa_save_start = utils::wclock();
- long right_psa_max_part_length = std::max((long)sizeof(block_offset_type), ram_use / 20L);
- info_right.psa = new distributed_file<block_offset_type>(output_filename,
- right_psa_max_part_length, right_block_psa_ptr, right_block_psa_ptr + right_block_size);
- long double right_psa_save_time = utils::wclock() - right_psa_save_start;
- long double right_psa_save_io = ((right_block_size * sizeof(block_offset_type)) / (1024.L * 1024)) / right_psa_save_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_psa_save_time, right_psa_save_io);
-
- // 1.e
- //
- // Write the BWT of the right half-block on disk.
- if (!last_block) {
- fprintf(stderr, " Write BWT to disk: ");
- long double right_bwt_save_start = utils::wclock();
- utils::write_objects_to_file(right_block_bwt, right_block_size, right_block_pbwt_fname);
- long double right_bwt_save_time = utils::wclock() - right_bwt_save_start;
- long double right_bwt_save_io = (right_block_size / (1024.L * 1024)) / right_bwt_save_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_bwt_save_time, right_bwt_save_io);
- }
- free(right_block_sabwt);
-
- // 1.f
- //
- // Write reversed gt_begin of the right half-block to disk.
- fprintf(stderr, " Write gt_begin to disk: ");
- long double right_gt_begin_rev_save_start = utils::wclock();
- right_block_gt_begin_rev_bv->save(right_block_gt_begin_rev_fname);
- right_block_gt_begin_rev = new multifile();
- right_block_gt_begin_rev->add_file(text_length - right_block_end, text_length - right_block_beg,
- right_block_gt_begin_rev_fname);
- delete right_block_gt_begin_rev_bv;
- long double right_gt_begin_rev_save_time = utils::wclock() - right_gt_begin_rev_save_start;
- long double right_gt_begin_rev_save_io = (right_block_size / (8.L * (1 << 20))) / right_gt_begin_rev_save_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_gt_begin_rev_save_time, right_gt_begin_rev_save_io);
- }
-
-
- //----------------------------------------------------------------------------
- // STEP 2: Process left half-block.
- //----------------------------------------------------------------------------
- fprintf(stderr, " Process left half-block:\n");
-
- // 2.a
- //
- // Read the left half-block from disk.
- fprintf(stderr, " Read: ");
- long double left_block_read_start = utils::wclock();
- unsigned char *left_block = (unsigned char *)malloc(left_block_size);
- utils::read_block(text_filename, left_block_beg, left_block_size, left_block);
- unsigned char left_block_last = left_block[left_block_size - 1];
- long double left_block_read_time = utils::wclock() - left_block_read_start;
- long double left_block_read_io = (left_block_size / (1024.L * 1024)) / left_block_read_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", left_block_read_time, left_block_read_io);
-
- // 2.b
- //
- // Compute partial SA, BWT and gt_begin for left half-block.
-
- // Allocate SA, BWT and gt_begin.
- unsigned char *left_block_sabwt = (unsigned char *)malloc(left_block_size * (sizeof(block_offset_type) + 1) + 1);
- block_offset_type *left_block_psa_ptr = (block_offset_type *)left_block_sabwt;
- unsigned char *left_block_bwt_ptr = (unsigned char *)(left_block_psa_ptr + left_block_size);
- bitvector *left_block_gt_begin_rev_bv = NULL;
- if (!first_block) left_block_gt_begin_rev_bv = new bitvector(left_block_size);
-
- // Start the timer.
- fprintf(stderr, " Internal memory sufsort: ");
- if (verbose) fprintf(stderr, "\n%s\n", std::string(60, '*').c_str());
- long double left_block_sascan_start = utils::wclock();
-
- // Close stderr.
- int stderr_backup = 0;
- if (!verbose) {
- std::fflush(stderr);
- stderr_backup = dup(2);
- int stderr_temp = open("/dev/null", O_WRONLY);
- dup2(stderr_temp, 2);
- close(stderr_temp);
- }
-
- // Run in-memory pSAscan.
- inmem_psascan_private::inmem_psascan<block_offset_type>(left_block, left_block_size, left_block_sabwt,
- max_threads, (right_block_size > 0), !first_block, left_block_gt_begin_rev_bv, -1, left_block_beg,
- left_block_end, text_length, text_filename, right_block_gt_begin_rev, &left_block_i0, right_block);
-
- // Restore stderr.
- if (!verbose) {
- std::fflush(stderr);
- dup2(stderr_backup, 2);
- close(stderr_backup);
- }
-
- // Print summary.
- long double left_block_sascan_time = utils::wclock() - left_block_sascan_start;
- long double left_block_sascan_speed = (left_block_size / (1024.L * 1024)) / left_block_sascan_time;
- if (verbose) fprintf(stderr, "%s\n", std::string(60, '*').c_str());
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", left_block_sascan_time, left_block_sascan_speed);
-
- // 2.c
- //
- // Compute the second terms of block initial ranks.
- long after_block_initial_rank = 0;
- if (!last_block) {
- fprintf(stderr, " Compute initial tail ranks (part 2): ");
- long double initial_ranks_second_term_start = utils::wclock();
- std::vector<long> block_initial_ranks_second_term;
- em_compute_initial_ranks<block_offset_type>(left_block, left_block_psa_ptr, left_block_beg,
- left_block_end, text_length, text_filename, tail_gt_begin_rev, block_initial_ranks_second_term,
- max_threads, block_tail_beg); // Note the space usage!
-
- after_block_initial_rank = block_initial_ranks_second_term[0];
- size_t vec_size = block_initial_ranks_second_term.size();
- for (size_t j = 0; j + 1 < vec_size; ++j)
- block_initial_ranks_second_term[j] = block_initial_ranks_second_term[j + 1];
- block_initial_ranks_second_term[vec_size - 1] = 0;
-
- for (size_t j = 0; j < vec_size; ++j)
- block_initial_ranks[j] += block_initial_ranks_second_term[j];
- fprintf(stderr, "%.2Lfs\n", utils::wclock() - initial_ranks_second_term_start);
- }
-
- // 2.d
- //
- // Write the partial SA of the left half-block to disk.
- fprintf(stderr, " Write partial SA to disk: ");
- long double left_psa_save_start = utils::wclock();
- long left_psa_max_part_length = std::max((long)sizeof(block_offset_type), ram_use / 20L);
- info_left.psa = new distributed_file<block_offset_type>(output_filename,
- left_psa_max_part_length, left_block_psa_ptr, left_block_psa_ptr + left_block_size);
- long double left_psa_save_time = utils::wclock() - left_psa_save_start;
- long double left_psa_save_io = ((left_block_size * sizeof(block_offset_type)) / (1024.L * 1024)) / left_psa_save_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", left_psa_save_time, left_psa_save_io);
-
- // 2.e
- //
- // Copy the BWT of the left half-block to separate array.
- unsigned char *left_block_bwt = NULL;
- if (right_block_size > 0) {
- fprintf(stderr, " Copy BWT of left half-block to separate array: ");
- long double left_bwt_copy_start = utils::wclock();
- left_block_bwt = (unsigned char *)malloc(left_block_size);
- std::copy(left_block_bwt_ptr, left_block_bwt_ptr + left_block_size, left_block_bwt);
- fprintf(stderr, "%.2Lfs\n", utils::wclock() - left_bwt_copy_start);
- }
-
- // 2.f
- //
- // Write gt_begin of the left half-block to disk.
- if (!first_block) {
- fprintf(stderr, " Write gt_begin to disk: ");
- long double left_gt_begin_rev_save_start = utils::wclock();
- std::string left_block_gt_begin_rev_fname = output_filename + "." + utils::random_string_hash();
- left_block_gt_begin_rev_bv->save(left_block_gt_begin_rev_fname);
- newtail_gt_begin_rev->add_file(text_length - left_block_end, text_length - left_block_beg, left_block_gt_begin_rev_fname);
- delete left_block_gt_begin_rev_bv;
- long double left_gt_begin_rev_save_time = utils::wclock() - left_gt_begin_rev_save_start;
- long double left_gt_begin_rev_save_io = (left_block_size / (8.L * (1 << 20))) / left_gt_begin_rev_save_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", left_gt_begin_rev_save_time, left_gt_begin_rev_save_io);
- }
-
- if (right_block_size == 0) {
- hblock_info.push_back(info_left);
- free(left_block);
- free(left_block_sabwt);
- return;
- }
-
- //----------------------------------------------------------------------------
- // STEP 3: Compute the gap array of the left half-block wrt to the
- // right half-block.
- //----------------------------------------------------------------------------
- fprintf(stderr, " Compute partial gap array for left half-block:\n");
- buffered_gap_array *left_block_gap = NULL;
-
- // 3.a
- //
- // Compute initial ranks for streaming of the right half-block.
- fprintf(stderr, " Compute initial ranks: ");
- long double initial_ranks_right_half_block_start = utils::wclock();
- std::vector<long> initial_ranks2;
- em_compute_initial_ranks<block_offset_type>(left_block, left_block_psa_ptr, left_block_bwt,
- left_block_i0, left_block_beg, left_block_end, text_length, text_filename, right_block_gt_begin_rev,
- initial_ranks2, max_threads, right_block_end, after_block_initial_rank); // Note the space usage!
-
- size_t vec_size = initial_ranks2.size();
- for (size_t j = 0; j + 1 < vec_size; ++j)
- initial_ranks2[j] = initial_ranks2[j + 1];
- initial_ranks2[vec_size - 1] = after_block_initial_rank;
-
- fprintf(stderr, "%.2Lfs\n", utils::wclock() - initial_ranks_right_half_block_start);
- free(left_block);
- free(left_block_sabwt);
-
- // 3.b
- //
- // Build the rank over BWT of left half-block.
- fprintf(stderr, " Construct rank: ");
- long double left_block_rank_build_start = utils::wclock();
- rank4n<> *left_block_rank = new rank4n<>(left_block_bwt, left_block_size, max_threads);
- long double left_block_rank_build_time = utils::wclock() - left_block_rank_build_start;
- long double left_block_rank_build_speed = (left_block_size / (1024.L * 1024)) / left_block_rank_build_time;
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", left_block_rank_build_time, left_block_rank_build_speed);
-
- // 3.c
- //
- // Compute gap array of the left half-block wrt to the right half-block.
- left_block_gap = new buffered_gap_array(left_block_size + 1, gap_filename);
- compute_gap<block_offset_type>(left_block_rank, left_block_gap, right_block_beg, right_block_end,
- text_length, max_threads, left_block_i0, gap_buf_size, left_block_last,
- initial_ranks2, text_filename, output_filename, right_block_gt_begin_rev, newtail_gt_begin_rev);
- delete left_block_rank;
- delete right_block_gt_begin_rev;
-
- if (last_block) {
- free(left_block_bwt);
-
- info_left.gap_filename = gap_filename + ".gap." + utils::random_string_hash();
- left_block_gap->save_to_file(info_left.gap_filename);
- left_block_gap->erase_disk_excess();
- delete left_block_gap;
-
- hblock_info.push_back(info_left);
- hblock_info.push_back(info_right);
- return;
- }
-
- //----------------------------------------------------------------------------
- // STEP 4: Compute the BWT for the block.
- //----------------------------------------------------------------------------
- fprintf(stderr, " Compute block gap array:\n");
-
- // 4.a
- //
- // Convert the partial gap of the left half-block into bitvector.
- fprintf(stderr, " Convert partial gap array of left half-block to bitvector: ");
- long double convert_to_bitvector_start = utils::wclock();
- bitvector *left_block_gap_bv = left_block_gap->convert_to_bitvector(max_threads);
- long double convert_to_bitvector_time = utils::wclock() - convert_to_bitvector_start;
- long double convert_to_bitvector_speed = (block_size / (1024.L * 1024)) / convert_to_bitvector_time;
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", convert_to_bitvector_time, convert_to_bitvector_speed);
-
- left_block_gap->erase_disk_excess();
- delete left_block_gap;
-
- // 4.b
- //
- // Read the BWT of the right half-block into RAM.
- fprintf(stderr, " Read BWT of right half-block: ");
- long double right_block_bwt_read_start = utils::wclock();
- unsigned char *right_block_bwt = NULL;
- utils::read_objects_from_file(right_block_bwt, right_block_size, right_block_pbwt_fname);
- long double right_block_bwt_read_time = utils::wclock() - right_block_bwt_read_start;
- long double right_block_bwt_read_io = (right_block_size / (1024.L * 1024)) / right_block_bwt_read_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", right_block_bwt_read_time, right_block_bwt_read_io);
-
- utils::file_delete(right_block_pbwt_fname);
-
- unsigned char *block_pbwt = (unsigned char *)malloc(block_size);
- long block_i0 = 0;
-
- // 4.c
- //
- // Merge BWTs of left and right half-block.
- fprintf(stderr, " Merge BWTs of half-blocks: ");
- long double bwt_merge_start = utils::wclock();
- block_i0 = merge_bwt(left_block_bwt, right_block_bwt, left_block_size, right_block_size,
- left_block_i0, right_block_i0, left_block_last, block_pbwt, left_block_gap_bv, max_threads);
- long double bwt_merge_time = utils::wclock() - bwt_merge_start;
- long double bwt_merge_speed = (block_size / (1024.L * 1024)) / bwt_merge_time;
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", bwt_merge_time, bwt_merge_speed);
-
- free(left_block_bwt);
- free(right_block_bwt);
-
- // 4.d
- //
- // Write left_block_gap_bv to disk.
- fprintf(stderr, " Write left half-block gap bitvector to disk: ");
- long double write_left_gap_bv_start = utils::wclock();
- std::string left_block_gap_bv_filename = gap_filename + ".left_block_gap_bv";
- left_block_gap_bv->save(left_block_gap_bv_filename);
- delete left_block_gap_bv;
- long double write_left_gap_bv_time = utils::wclock() - write_left_gap_bv_start;
- long double write_left_gap_bv_io = ((block_size / 8.L) / (1 << 20)) / write_left_gap_bv_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", write_left_gap_bv_time, write_left_gap_bv_io);
-
- //----------------------------------------------------------------------------
- // STEP 5: Compute the gap array of the block.
- //----------------------------------------------------------------------------
-
- // 5.a
- //
- // Construct the rank data structure over BWT of the block.
- fprintf(stderr, " Construct rank: ");
- long double whole_block_rank_build_start = utils::wclock();
- rank4n<> *block_rank = new rank4n<>(block_pbwt, block_size, max_threads);
- free(block_pbwt);
- long double whole_block_rank_build_time = utils::wclock() - whole_block_rank_build_start;
- long double whole_block_rank_build_io = (block_size / (1024.L * 1024)) / whole_block_rank_build_time;
- fprintf(stderr, "%.2Lfs (%.2LfMiB/s)\n", whole_block_rank_build_time, whole_block_rank_build_io);
-
- buffered_gap_array *block_gap = new buffered_gap_array(block_size + 1, gap_filename);
-
- // 5.b
- //
- // Compute gap for the block. During this step we also compute gt_begin
- // for the new tail.
- compute_gap<block_offset_type>(block_rank, block_gap, block_tail_beg, block_tail_end, text_length,
- max_threads, block_i0, gap_buf_size, block_last_symbol, block_initial_ranks, text_filename,
- output_filename, tail_gt_begin_rev, newtail_gt_begin_rev);
- delete block_rank;
-
- block_gap->flush_excess_to_disk();
-
- // 5.c
- //
- // Read left_block_gap_bv from disk.
- fprintf(stderr, " Read left half-block gap bitvector from disk: ");
- long double left_block_gap_bv_read_start = utils::wclock();
- left_block_gap_bv = new bitvector(left_block_gap_bv_filename);
- long double left_block_gap_bv_read_time = utils::wclock() - left_block_gap_bv_read_start;
- long double left_block_gap_bv_read_io = ((block_size / 8.L) / (1 << 20)) / left_block_gap_bv_read_time;
- fprintf(stderr, "%.2Lfs (I/O: %.2LfMiB/s)\n", left_block_gap_bv_read_time, left_block_gap_bv_read_io);
- utils::file_delete(left_block_gap_bv_filename);
-
- //----------------------------------------------------------------------------
- // STEP 6: Compute gap arrays of half-blocks.
- //----------------------------------------------------------------------------
- info_left.gap_filename = gap_filename + ".gap." + utils::random_string_hash();
- info_right.gap_filename = gap_filename + ".gap." + utils::random_string_hash();
-
- gap_array_2n *block_gap_2n = new gap_array_2n(block_gap, max_threads);
- delete block_gap;
- block_gap_2n->apply_excess_from_disk(std::max((1L << 20), block_size), max_threads);
-
- long ram_budget = std::max(1L << 20, (long)(0.875L * block_size));
- compute_right_gap(left_block_size, right_block_size, block_gap_2n, left_block_gap_bv, info_right.gap_filename, max_threads, ram_budget);
- compute_left_gap(left_block_size, right_block_size, block_gap_2n, left_block_gap_bv, info_left.gap_filename, max_threads, ram_budget);
-
- block_gap_2n->erase_disk_excess();
-
- delete block_gap_2n;
- delete left_block_gap_bv;
-
- hblock_info.push_back(info_left);
- hblock_info.push_back(info_right);
-}
-
-
-//=============================================================================
-// Compute partial SAs and gap arrays and write to disk.
-// Return the array of handlers to distributed files as a result.
-//=============================================================================
-template<typename block_offset_type>
-std::vector<half_block_info<block_offset_type> > partial_sufsort(std::string text_filename, std::string output_filename,
- std::string gap_filename, long text_length, long max_block_size, long ram_use, long max_threads, long gap_buf_size,
- bool verbose) {
- fprintf(stderr, "sizeof(block_offset_type) = %lu\n\n", sizeof(block_offset_type));
-
- long n_blocks = (text_length + max_block_size - 1) / max_block_size;
- multifile *tail_gt_begin_reversed = NULL;
-
- std::vector<half_block_info<block_offset_type> > hblock_info;
- for (long block_id = n_blocks - 1; block_id >= 0; --block_id) {
- long block_beg = max_block_size * block_id;
- long block_end = std::min(block_beg + max_block_size, text_length);
- fprintf(stderr, "Process block %ld/%ld [%ld..%ld):\n", n_blocks - block_id, n_blocks, block_beg, block_end);
-
- multifile *newtail_gt_begin_reversed = new multifile();
- process_block<block_offset_type>(block_beg, block_end, text_length, ram_use, max_threads, gap_buf_size,
- text_filename, output_filename, gap_filename, newtail_gt_begin_reversed, tail_gt_begin_reversed,
- hblock_info, verbose);
-
- delete tail_gt_begin_reversed;
- tail_gt_begin_reversed = newtail_gt_begin_reversed;
- }
-
- delete tail_gt_begin_reversed;
- return hblock_info;
-}
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_PARTIAL_SUFSORT_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/psascan.h b/exttools/pSAscan-0.1.0/src/psascan_src/psascan.h
deleted file mode 100644
index 5fe9b87b..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/psascan.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * @file src/psascan_src/psascan.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_PSASCAN_H_INCLUDED
-#define __PSASCAN_SRC_PSASCAN_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <sys/resource.h>
-
-#include "partial_sufsort.h"
-#include "merge.h"
-#include "utils.h"
-#include "uint40.h"
-#include "half_block_info.h"
-
-
-namespace psascan_private {
-
-void pSAscan(std::string input_filename, std::string output_filename,
- std::string gap_filename, long ram_use, long max_threads,
- bool verbose, long gap_buf_size = (1L << 21)) {
- long n_gap_buffers = 2 * max_threads;
- if (ram_use < 6L) {
- fprintf(stderr, "Error: not enough memory to run pSAscan.\n");
- std::exit(EXIT_FAILURE);
- }
-
- // Turn paths absolute.
- input_filename = utils::absolute_path(input_filename);
- output_filename = utils::absolute_path(output_filename);
- gap_filename = utils::absolute_path(gap_filename);
- long length = utils::file_size(input_filename);
- fprintf(stderr, "Input filename = %s\n", input_filename.c_str());
- fprintf(stderr, "Output filename = %s\n", output_filename.c_str());
- fprintf(stderr, "Gap filename = %s\n", gap_filename.c_str());
- fprintf(stderr, "Input length = %ld (%.1LfMiB)\n", length, 1.L * length / (1L << 20));
- fprintf(stderr, "\n");
-
- long ram_for_threads = n_gap_buffers * gap_buf_size; // for buffers
- if (ram_use / 5.2L < (long double)(1L << 31)) // for oracle
- ram_for_threads += max_threads * gap_buf_size;
- else ram_for_threads += ((4.L / 5) * max_threads) * gap_buf_size;
- ram_for_threads += max_threads * gap_buf_size; // for temp
- ram_for_threads += max_threads * (6L << 20); // for reader/writer buffers
-
- long ram_use_excluding_threads = ram_use - ram_for_threads;
- if (ram_use_excluding_threads < 6L) {
- long required_MiB = (ram_for_threads + (1L << 20) - 1) / (1L << 20);
- fprintf(stderr, "Error: not enough memory to start threads. You need "
- "at least %ldMiB\n", required_MiB + 1);
- std::exit(EXIT_FAILURE);
- }
-
- fprintf(stderr, "RAM budget = %ld (%.1LfMiB)\n", ram_use, 1.L * ram_use / (1L << 20));
- fprintf(stderr, "RAM budget (excluding threads) = %ld (%.1LfMiB)\n",
- ram_use_excluding_threads, 1.L * ram_use_excluding_threads / (1L << 20));
- long max_block_size = std::max(2L, (long)(ram_use_excluding_threads / 5.2L));
-
- fprintf(stderr, "Max block size = %ld (%.1LfMiB)\n\n", max_block_size, 1.L * max_block_size / (1L << 20));
- fprintf(stderr, "Parallel settings:\n");
- fprintf(stderr, " #streaming threads = %ld\n", max_threads);
- fprintf(stderr, " #gap buffers = %ld\n", n_gap_buffers);
- fprintf(stderr, " gap buffer size = %ld\n\n", gap_buf_size);
-
- // Check if the maximum number of open files
- // is large enough for the merging to work.
- long n_half_blocks_estimated = 2L * (length / max_block_size + 1);
- long merge_max_open_files_estimated = 2L * n_half_blocks_estimated;
- long stream_max_open_files_estimated = 3L * max_threads + 1;
- long max_open_files_estimated = std::max(merge_max_open_files_estimated, stream_max_open_files_estimated);
- rlimit rlimit_res;
- if (!getrlimit(RLIMIT_NOFILE, &rlimit_res) &&
- (long)rlimit_res.rlim_cur < max_open_files_estimated) {
- fprintf(stderr,
-"\nError: the limit on the maximum number of open files is too small\n"
-"(current limit = %ld, required limit = %ld). See the README for\n"
-"more information.\n",
- (long)rlimit_res.rlim_cur, max_open_files_estimated);
- std::exit(EXIT_FAILURE);
- }
-
- long double start = utils::wclock();
- if (max_block_size < (1L << 31)) {
- std::vector<half_block_info<int> > hblock_info = partial_sufsort<int>(input_filename,
- output_filename, gap_filename, length, max_block_size, ram_use, max_threads, gap_buf_size, verbose);
- merge<int>(output_filename, ram_use, hblock_info);
- } else {
- std::vector<half_block_info<uint40> > hblock_info = partial_sufsort<uint40>(input_filename,
- output_filename, gap_filename, length, max_block_size, ram_use, max_threads, gap_buf_size, verbose);
- merge<uint40>(output_filename, ram_use, hblock_info);
- }
- long double total_time = utils::wclock() - start;
-
- fprintf(stderr, "\n\nComputation finished. Summary:\n");
- fprintf(stderr, " elapsed time: %.2Lfs (%.4Lfs/MiB)\n", total_time, total_time / ((1.L * length) / (1L << 20)));
- fprintf(stderr, " speed: %.2LfMiB/s\n", ((1.L * length) / (1L << 20)) / total_time);
-}
-
-} // namespace psascan_private
-
-
-// The main function.
-void pSAscan(std::string input_filename, std::string output_filename,
- std::string gap_filename, long ram_use, long max_threads, bool verbose) {
- psascan_private::pSAscan(input_filename, output_filename,
- gap_filename, ram_use, max_threads, verbose);
-}
-
-#endif // __PSASCAN_SRC_PSASCAN_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/rank.h b/exttools/pSAscan-0.1.0/src/psascan_src/rank.h
deleted file mode 100644
index 777a6251..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/rank.h
+++ /dev/null
@@ -1,778 +0,0 @@
-/**
- * @file src/psascan_src/rank.h
- * @author Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section DESCRIPTION
- *
- * A general rank data structure. Basic idea of the encoding is from
- * the rank data structure used in the external-memory algorithm for
- * constructing the Burrows-Wheeler transform called bwtdisk (available
- * at: http://people.unipmn.it/manzini/bwtdisk/) described in [1]. We
- * extended the data structure by applying the fixed block boosting [2]
- * and alphabet partitioning [3] techniques. The resulting data structure
- * was described in [4]. This file extends the implementation used in [4]
- * by parallelizing the construction and introducting an alternative
- * encoding (called type-I in the code). Type-I encoding is a novel
- * encoding due to present authors.
- *
- * References:
- * [1] Paolo Ferragina, Travis Gagie, Giovanni Manzini:
- * Lightweight Data Indexing and Compression in External Memory.
- * Algorithmica 63(3), p. 707-730 (2012).
- * [2] Juha Karkkainen, Simon J. Puglisi:
- * Fixed Block Compression Boosting in FM-Indexes.
- * In Proc. SPIRE 2011, p. 174-184.
- * [3] Jeremy Barbay, Travis Gagie, Gonzalo Navarro, Yakov Nekrich:
- * Alphabet Partitioning for Compressed Rank/Select and Applications.
- * In Proc. ISAAC 2010, p. 315-326.
- * [4] Juha Karkkainen, Dominik Kempa:
- * Engineering a Lightweight External Memory Suffix Array Construction
- * Algorithm.
- * In Proc. ICABD 2014, p. 53-60.
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_RANK_H_INCLUDED
-#define __PSASCAN_SRC_RANK_H_INCLUDED
-
-#include <algorithm>
-#include <vector>
-#include <thread>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-template<
- unsigned k_sblock_size_log = 24,
- unsigned k_cblock_size_log = 20,
- unsigned k_sigma_log = 8>
-class rank4n {
- private:
- static const unsigned long k_cblock_size;
- static const unsigned long k_cblock_size_mask;
- static const unsigned long k_cblock_size_mask_neg;
- static const unsigned k_cblocks_in_sblock_log;
- static const unsigned k_cblocks_in_sblock;
- static const unsigned k_cblocks_in_sblock_mask;
- static const unsigned k_2cblock_size;
- static const unsigned k_2cblock_size_mask;
- static const unsigned k_sblock_size;
- static const unsigned k_sblock_size_mask;
- static const unsigned k_sigma;
- static const unsigned k_sigma_mask;
-
- static const unsigned k_char_type_freq = 0x01;
- static const unsigned k_char_type_rare = 0x02;
- static const unsigned k_char_type_missing = 0x03;
-
- unsigned long m_length; // length of original sequence
- unsigned long n_cblocks; // number of context blocks
- unsigned long n_sblocks; // number of super blocks
-
- unsigned long *m_sblock_header;
- unsigned long *m_cblock_header;
- unsigned long *m_cblock_header2;
-
- unsigned char *m_cblock_type;
- unsigned char *m_cblock_mapping;
-
- unsigned *m_freq_trunk;
- unsigned *m_rare_trunk;
-
- public:
- unsigned long *m_count; // symbol counts
-
- public:
- rank4n(const unsigned char *text, unsigned long length, unsigned max_threads) {
- m_length = length;
- n_cblocks = (m_length + k_cblock_size - 1) / k_cblock_size;
- n_sblocks = (n_cblocks + k_cblocks_in_sblock - 1) / k_cblocks_in_sblock;
-
- m_count = (unsigned long *)malloc(256L * sizeof(unsigned long));
- std::fill(m_count, m_count + 256, 0UL);
- if (!m_length) return;
-
- m_sblock_header = (unsigned long *)malloc(n_sblocks * sizeof(unsigned long) * k_sigma);
- m_cblock_header = (unsigned long *)malloc(n_cblocks * sizeof(unsigned long));
- m_cblock_header2 = (unsigned long *)malloc(n_cblocks * k_sigma * sizeof(unsigned long));
- m_cblock_mapping = (unsigned char *)malloc(n_cblocks * k_sigma * 2);
- m_cblock_type = (unsigned char *)malloc((n_cblocks + 7) / 8);
- m_freq_trunk = (unsigned *)calloc(n_cblocks * k_cblock_size, sizeof(unsigned));
- std::fill(m_cblock_type, m_cblock_type + (n_cblocks + 7) / 8, 0);
-
- encode_type_I(text, max_threads);
- encode_type_II(text, max_threads);
-
- m_count[0] -= n_cblocks * k_cblock_size - m_length; // remove extra zeros
- }
-
- void encode_type_I(const unsigned char *text, long max_threads) {
- //------------------------------------------------------------------------
- // STEP 1: split all cblocks into equal size ranges (except possible the
- // last one). Each range is processed by one thread. During this
- // step we compute: (i) type of each cblock, (ii) encode all
- // type-I cblocks and for all type-II cblocks, we compute and
- // store: symbol mapping, symbol type (freq / rare / non-occurring)
- // and values of freq_cnt_log and rare_cnt_log.
- //------------------------------------------------------------------------
- unsigned long range_size = (n_cblocks + max_threads - 1) / max_threads;
- unsigned long n_ranges = (n_cblocks + range_size - 1) / range_size;
-
- unsigned long *rare_trunk_size = new unsigned long[n_cblocks];
- std::fill(rare_trunk_size, rare_trunk_size + n_cblocks, 0);
-
- bool *cblock_type = new bool[n_cblocks];
- std::fill(cblock_type, cblock_type + n_cblocks, 0);
-
- unsigned **occ = (unsigned **)malloc(n_ranges * sizeof(unsigned *));
- for (unsigned long i = 0; i < n_ranges; ++i)
- occ[i] = (unsigned *)malloc((k_cblock_size + 1) * sizeof(unsigned));
-
- std::thread **threads = new std::thread*[n_ranges];
- for (unsigned long i = 0; i < n_ranges; ++i) {
- unsigned long range_beg = i * range_size;
- unsigned long range_end = std::min(range_beg + range_size, n_cblocks);
-
- threads[i] = new std::thread(encode_type_I_aux, std::ref(*this),
- text, range_beg, range_end, rare_trunk_size, cblock_type, occ[i]);
- }
-
- for (unsigned long i = 0; i < n_ranges; ++i) threads[i]->join();
- for (unsigned long i = 0; i < n_ranges; ++i) delete threads[i];
- delete[] threads;
-
- for (unsigned long i = 0; i < n_ranges; ++i)
- free(occ[i]);
- free(occ);
-
- //------------------------------------------------------------------------
- // STEP 2: compute global information based on local cblock computation:
- // * store cblock types,
- // * total size of rare trunk,
- // * pointers to the beginning of each rare trunk,
- // * cumulative counts of all symbols,
- // * non-inclusive partial sum over cblock range counts.
- //------------------------------------------------------------------------
- unsigned long rare_trunk_total_size = 0;
- for (unsigned long cblock_id = 0; cblock_id < n_cblocks; ++cblock_id) {
- unsigned long cblock_beg = (cblock_id << k_cblock_size_log);
-
- // 1
- // Store cblock type.
- if (cblock_type[cblock_id])
- m_cblock_type[cblock_id >> 3] |= (1 << (cblock_id & 7));
-
- // 2
- // Compute the pointer to rare trunk and update total rare trunk size.
- unsigned long this_cblock_rare_trunk_size = rare_trunk_size[cblock_id];
- m_cblock_header[cblock_id] |= (rare_trunk_total_size << 16);
- rare_trunk_total_size += this_cblock_rare_trunk_size;
-
- // 3
- // Update cblock header.
- unsigned long cblock_header_beg = (cblock_id << k_sigma_log);
- for (unsigned c = 0; c < k_sigma; ++c)
- m_cblock_header2[cblock_header_beg + c] |= (m_count[c] << (k_cblock_size_log + 6));
-
- // 4
- // Update sblock header,
- if (!(cblock_beg & k_sblock_size_mask)) {
- unsigned long sblock_id = (cblock_beg >> k_sblock_size_log);
- unsigned long sblock_header_beg = (sblock_id << k_sigma_log);
- for (unsigned c = 0; c < k_sigma; ++c)
- m_sblock_header[sblock_header_beg + c] = m_count[c];
- }
-
- // 5
- // Update m_count.
- unsigned long ptr = (cblock_id << k_sigma_log);
- for (unsigned c = 0; c + 1 < k_sigma; ++c)
- m_count[c] += ((m_cblock_header2[ptr + c + 1] >> 5) & k_2cblock_size_mask) -
- ((m_cblock_header2[ptr + c] >> 5) & k_2cblock_size_mask);
- m_count[k_sigma - 1] += k_cblock_size -
- ((m_cblock_header2[ptr + k_sigma - 1] >> 5) & k_2cblock_size_mask);
- }
- m_rare_trunk = (unsigned *)calloc(rare_trunk_total_size, sizeof(unsigned));
-
- delete[] cblock_type;
- delete[] rare_trunk_size;
- }
-
- static void encode_type_I_aux(rank4n &r, const unsigned char *text,
- unsigned long cblock_range_beg, unsigned long cblock_range_end,
- unsigned long *rare_trunk_size, bool *cblock_type, unsigned *occ) {
- std::vector<std::pair<uint32_t, unsigned char> > sorted_chars;
- std::vector<unsigned char> freq_chars;
- std::vector<unsigned char> rare_chars;
-
- unsigned *refpoint_precomputed = (unsigned *)malloc(k_cblock_size * sizeof(unsigned));
- unsigned *cblock_count = new unsigned[k_sigma];
- unsigned *list_beg = new unsigned[k_sigma];
- unsigned *list_beg2 = new unsigned[k_sigma];
- bool *isfreq = new bool[k_sigma];
- unsigned *lookup_bits_precomputed = new unsigned[k_sigma];
- unsigned *min_block_size_precomputed = new unsigned[k_sigma];
- unsigned long *refpoint_mask_precomputed = new unsigned long[k_sigma];
-
- // Process cblocks one by one.
- for (unsigned long cblock_id = cblock_range_beg; cblock_id < cblock_range_end; ++cblock_id) {
- unsigned long cblock_beg = cblock_id << k_cblock_size_log;
- unsigned long cblock_end = cblock_beg + k_cblock_size;
-
- // Compute symbol counts inside cblock.
- std::fill(cblock_count, cblock_count + k_sigma, 0);
- unsigned long maxj = std::min(cblock_end, r.m_length);
- for (unsigned long j = cblock_beg; j < maxj; ++j)
- ++cblock_count[text[j]];
- cblock_count[0] += cblock_end - maxj;
-
- // Compute starting positions of occurrences lists.
- for (unsigned j = 0, t, s = 0; j < k_sigma; ++j) {
- t = cblock_count[j];
- list_beg[j] = s;
- list_beg2[j] = s;
- s += t;
- }
-
- // Store pointers to beginnings of occurrence lists in the type-I
- // cblock header. Note: this implicitly encodes cblock counts.
- for (unsigned c = 0; c < k_sigma; ++c)
- r.m_cblock_header2[(cblock_id << k_sigma_log) + c] = (list_beg[c] << 5);
-
- // Sort symbol counts by frequencies.
- sorted_chars.clear();
- for (unsigned j = 0; j < k_sigma; ++j)
- if (cblock_count[j])
- sorted_chars.push_back(std::make_pair(cblock_count[j], j));
- std::sort(sorted_chars.begin(), sorted_chars.end());
-
- // Separate (at most, due to rounding of freq_cnt)
- // about 3% of rarest symbols.
- unsigned rare_cnt = 0L, rare_sum = 0L;
- while (rare_cnt < sorted_chars.size() &&
- 16L * (rare_sum + sorted_chars[rare_cnt].first) <= k_cblock_size)
- rare_sum += sorted_chars[rare_cnt++].first;
-
- // Compute freq_cnt. Then round up freq_cnt + 1 (+1 is
- // for rare char marker) to the smallest power of two.
- // Note: rare_cnt > 0, so after rounding freq_cnt <= 256.
- unsigned freq_cnt = sorted_chars.size() - rare_cnt;
- unsigned freq_cnt_log = utils::log2ceil(freq_cnt + 1);
- freq_cnt = (1 << freq_cnt_log);
-
- // Recompute rare_cnt (note the +1).
- rare_cnt = 0;
- if (sorted_chars.size() + 1 > freq_cnt)
- rare_cnt = sorted_chars.size() + 1 - freq_cnt;
-
- // Compute freq and rare chars.
- rare_chars.clear();
- freq_chars.clear();
- for (unsigned i = 0; i < rare_cnt; ++i)
- rare_chars.push_back(sorted_chars[i].second);
- for (unsigned i = rare_cnt; i < sorted_chars.size(); ++i)
- freq_chars.push_back(sorted_chars[i].second);
-
- // If there are rare symbols, round up
- // rare_cnt to the smallest power of two.
- unsigned rare_cnt_log = 0;
- if (rare_cnt) {
- rare_cnt_log = utils::log2ceil(rare_cnt);
- rare_cnt = (1 << rare_cnt_log);
- }
-
- // Update cblock type-I header.
- r.m_cblock_header[cblock_id] = freq_cnt_log;
- r.m_cblock_header[cblock_id] |= (rare_cnt_log << 8);
-
- // Compute and store symbols mapping.
- std::sort(freq_chars.begin(), freq_chars.end());
- std::sort(rare_chars.begin(), rare_chars.end());
- std::fill(isfreq, isfreq + 256, false);
- for (unsigned c = 0; c < 256; ++c)
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_missing;
- for (unsigned i = 0; i < freq_chars.size(); ++i) {
- unsigned char c = freq_chars[i];
- isfreq[c] = true;
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1] = i;
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_freq;
- }
- for (unsigned i = 0; i < rare_chars.size(); ++i) {
- unsigned char c = rare_chars[i];
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1] = i;
- r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)] = k_char_type_rare;
- }
-
- unsigned nofreq_cnt = 0L;
- for (unsigned c = 0; c < k_sigma; ++c)
- if (!isfreq[c]) nofreq_cnt += cblock_count[c];
-
-
- if (freq_cnt >= 128) { // type-I cblock
- cblock_type[cblock_id] = true;
-
- // Compute lists of occurrences.
- for (unsigned long i = cblock_beg; i < maxj; ++i)
- occ[list_beg2[text[i]]++] = i - cblock_beg;
- for (unsigned long i = maxj; i < cblock_end; ++i)
- occ[list_beg2[0]++] = i - cblock_beg;
-
- // Precompute helper arrays and and store lookup bits into the header.
- for (unsigned c = 0; c < k_sigma; ++c) {
- lookup_bits_precomputed[c] = utils::log2ceil(cblock_count[c] + 2);
- r.m_cblock_header2[(cblock_id << 8) + c] |= lookup_bits_precomputed[c];
- if (cblock_count[c])
- min_block_size_precomputed[c] = k_cblock_size / cblock_count[c];
- else min_block_size_precomputed[c] = 0;
-
- unsigned refpoint_dist_log = 31 - lookup_bits_precomputed[c];
- unsigned long refpoint_dist = (1UL << refpoint_dist_log);
- unsigned long refpoint_dist_mask = refpoint_dist - 1;
- unsigned long refpoint_dist_mask_neg = (~refpoint_dist_mask);
- refpoint_mask_precomputed[c] = refpoint_dist_mask_neg;
- }
-
- // Actual encoding follows.
- unsigned *cblock_trunk = r.m_freq_trunk + cblock_beg;
- for (unsigned c = 0; c < k_sigma; ++c) {
- unsigned freq = cblock_count[c];
- unsigned min_block_size = min_block_size_precomputed[c];
- unsigned lookup_bits = lookup_bits_precomputed[c];
- unsigned refpoint_dist_mask_neg = refpoint_mask_precomputed[c];
- unsigned c_list_beg = list_beg[c];
-
- for (unsigned j = 0; j < freq; ++j)
- cblock_trunk[c_list_beg + j] = freq + 1;
- if (freq) cblock_trunk[c_list_beg + freq - 1] = freq;
-
- unsigned block_beg = 0;
- for (unsigned j = 0; j < freq; ++j) {
- refpoint_precomputed[j] = (block_beg & refpoint_dist_mask_neg);
- block_beg += min_block_size;
- if ((((unsigned long)block_beg * freq) >> k_cblock_size_log) == j) ++block_beg;
- }
-
- unsigned refpoint, block_id;
- unsigned mask = (~((1UL << lookup_bits) - 1));
- if (freq) {
- for (long j = freq - 1; j >= 0; --j) {
- block_id = (((unsigned long)occ[c_list_beg + j] * freq) >> k_cblock_size_log);
- refpoint = refpoint_precomputed[block_id];
- cblock_trunk[c_list_beg + block_id] &= mask;
- cblock_trunk[c_list_beg + block_id] |= (unsigned)j;
- cblock_trunk[c_list_beg + j] |= ((occ[c_list_beg + j] - refpoint) << lookup_bits);
- }
- }
- }
- } else {
- // Update rare_trunk_size.
- if (rare_cnt) {
- long rare_blocks = 1 + (nofreq_cnt + rare_cnt - 1) / rare_cnt;
- rare_trunk_size[cblock_id] = rare_blocks * rare_cnt;
- }
- }
- }
-
- // Clean up.
- delete[] list_beg;
- delete[] list_beg2;
- delete[] isfreq;
- delete[] cblock_count;
- delete[] lookup_bits_precomputed;
- delete[] min_block_size_precomputed;
- delete[] refpoint_mask_precomputed;
- free(refpoint_precomputed);
- }
-
- void encode_type_II(const unsigned char *text, long max_threads) {
- unsigned long range_size = (n_cblocks + max_threads - 1) / max_threads;
- unsigned long n_ranges = (n_cblocks + range_size - 1) / range_size;
-
- std::thread **threads = new std::thread*[n_ranges];
- for (unsigned long i = 0; i < n_ranges; ++i) {
- unsigned long range_beg = i * range_size;
- unsigned long range_end = std::min(range_beg + range_size, n_cblocks);
-
- threads[i] = new std::thread(encode_type_II_aux,
- std::ref(*this), text, range_beg, range_end);
- }
-
- for (unsigned long i = 0; i < n_ranges; ++i) threads[i]->join();
- for (unsigned long i = 0; i < n_ranges; ++i) delete threads[i];
- delete[] threads;
- }
-
- static void encode_type_II_aux(rank4n &r, const unsigned char *text,
- unsigned long cblock_range_beg, unsigned long cblock_range_end) {
- unsigned char *freq_map = new unsigned char[k_sigma];
- unsigned char *rare_map = new unsigned char[k_sigma];
- unsigned long *cur_count = new unsigned long[k_sigma];
- unsigned long *off = new unsigned long[k_sigma];
-
- long *sblock_h = new long[k_sigma];
- int *israre = new int[k_sigma];
-
- std::vector<unsigned char> freq_chars;
- std::vector<unsigned char> rare_chars;
-
- for (unsigned long cblock_id = cblock_range_beg; cblock_id < cblock_range_end; ++cblock_id) {
- unsigned long cblock_beg = cblock_id << k_cblock_size_log;
- unsigned long cblock_end = cblock_beg + k_cblock_size;
-
- // Skip the cblock if it was type-I encoded.
- if (r.m_cblock_type[cblock_id >> 3] & (1 << (cblock_id & 7))) continue;
-
- // Retreive symbol counts up to this cblock begin and
- // pointer to rare trunk size from cblock headers.
- for (unsigned c = 0; c < k_sigma; ++c)
- cur_count[c] = (r.m_cblock_header2[(cblock_id << 8) + c] >> (k_cblock_size_log + 6));
-
- long r_filled = (r.m_cblock_header[cblock_id] >> 16);
- long r_ptr = r_filled;
-
- long freq_cnt_log = (r.m_cblock_header[cblock_id] & 255L);
- long rare_cnt_log = ((r.m_cblock_header[cblock_id] >> 8) & 255L);
- long freq_cnt = (1L << freq_cnt_log);
- long rare_cnt = (1L << rare_cnt_log);
- long rare_cnt_mask = rare_cnt - 1;
-
- freq_chars.clear();
- rare_chars.clear();
- std::fill(israre, israre + k_sigma, 1);
- for (unsigned c = 0; c < k_sigma; ++c) {
- unsigned char type = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id)];
- if (type == k_char_type_freq) {
- israre[c] = 0;
- freq_chars.push_back(c);
- freq_map[c] = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1];
- } else if (type == k_char_type_rare) {
- rare_chars.push_back(c);
- rare_map[c] = r.m_cblock_mapping[2 * (c * r.n_cblocks + cblock_id) + 1];
- freq_map[c] = freq_cnt - 1;
- }
- }
-
- if (rare_chars.empty()) {
- rare_cnt_log = 0;
- rare_cnt = 0;
- }
-
- long sblock_id = (cblock_beg >> k_sblock_size_log);
- std::copy(r.m_sblock_header + (sblock_id << 8), r.m_sblock_header + (sblock_id << 8) + k_sigma, sblock_h);
- for (long j = 0; j < k_sigma; ++j) off[j] = cur_count[j] - sblock_h[j];
-
- long nofreq_cnt = 0;
- long freq_chars_size = (long)freq_chars.size();
- long rare_chars_size = (long)rare_chars.size();
-
- if (cblock_end <= r.m_length) {
- for (unsigned long i = cblock_beg; i < cblock_end; i += freq_cnt) {
- for (long j = 0; j < freq_chars_size; ++j) {
- unsigned char ch = freq_chars[j];
- r.m_freq_trunk[i + j] = (off[ch] << 8);
- }
- r.m_freq_trunk[i + freq_cnt - 1] = (nofreq_cnt << 8);
- for (unsigned long j = i; j < i + freq_cnt; ++j) {
- unsigned char c = text[j];
- r.m_freq_trunk[j] |= freq_map[c];
- if (israre[c]) {
- if (!(nofreq_cnt & rare_cnt_mask)) {
- for (long jj = 0; jj < rare_chars_size; ++jj) {
- unsigned char ch = rare_chars[jj];
- r.m_rare_trunk[r_filled++] = (off[ch] << 8);
- }
- r_filled += rare_cnt - rare_chars_size;
- }
- r.m_rare_trunk[r_ptr++] |= rare_map[c];
- }
- ++off[c];
- nofreq_cnt += israre[c];
- }
- }
- for (long i = 0; i < k_sigma; ++i)
- cur_count[i] = sblock_h[i] + off[i];
- } else {
- for (unsigned long i = cblock_beg; i < cblock_end; i += freq_cnt) {
- for (long j = 0; j < freq_chars_size; ++j) {
- unsigned char ch = freq_chars[j];
- r.m_freq_trunk[i + j] = (off[ch] << 8);
- }
- r.m_freq_trunk[i + freq_cnt - 1] = (nofreq_cnt << 8);
- for (unsigned long j = i; j < i + freq_cnt; ++j) {
- unsigned char c = (j < r.m_length ? text[j] : 0);
- r.m_freq_trunk[j] |= freq_map[c];
- if (israre[c]) {
- if (!(nofreq_cnt & rare_cnt_mask)) {
- for (long jj = 0; jj < rare_chars_size; ++jj) {
- unsigned char ch = rare_chars[jj];
- r.m_rare_trunk[r_filled++] = (off[ch] << 8);
- }
- r_filled += rare_cnt - rare_chars_size;
- }
- r.m_rare_trunk[r_ptr++] |= rare_map[c];
- }
- ++off[c];
- nofreq_cnt += israre[c];
- }
- }
- for (long i = 0; i < k_sigma; ++i)
- cur_count[i] = sblock_h[i] + off[i];
- }
-
- for (long j = 0; j < rare_cnt; ++j) {
- unsigned char ch = (j < (long)rare_chars.size() ? rare_chars[j] : 0);
- long local_rank = cur_count[ch] - r.m_sblock_header[(sblock_id << 8) + ch];
- r.m_rare_trunk[r_filled++] = (local_rank << 8);
- }
- }
-
- delete[] cur_count;
- delete[] sblock_h;
- delete[] freq_map;
- delete[] rare_map;
- delete[] israre;
- delete[] off;
- }
-
- inline long rank(long i, unsigned char c) const {
- if (i <= 0) return 0L;
- else if ((unsigned long)i >= m_length) return m_count[c];
-
- unsigned long cblock_id = (i >> k_cblock_size_log);
- if (m_cblock_type[cblock_id >> 3] & (1 << (cblock_id & 7))) { // type-I cblock
- long cblock_beg = (i & k_cblock_size_mask_neg);
- long cblock_i = (i & k_cblock_size_mask); // offset in cblock
-
- // Extract the rank up to the start of cblock.
- long rank_up_to_cblock = (m_cblock_header2[(cblock_id << k_sigma_log) + c] >> (k_cblock_size_log + 6));
-
- // Now we compute the number of occurrences of c inside the cblock.
- // First, decode the beginning and end of c's occurrence list.
- long list_beg = ((m_cblock_header2[(cblock_id << k_sigma_log) + c] >> 5) & k_2cblock_size_mask);
- long list_end = ((c == k_sigma - 1) ? k_cblock_size :
- ((m_cblock_header2[(cblock_id << k_sigma_log) + c + 1] >> 5) & k_2cblock_size_mask));
- if (list_beg == list_end) return rank_up_to_cblock;
-
- // Compute the distance from i to the closest reference point on the left.
- long lookup_bits = (m_cblock_header2[(cblock_id << k_sigma_log) + c] & 31);
- long refpoint_dist_log = 31 - lookup_bits;
- long refpoint_disk_mask = (1L << refpoint_dist_log) - 1;
- long i_refpoint_offset = (cblock_i & refpoint_disk_mask);
-
- // Compute threshold of symbol c inside the current cblock.
- long threshold = (1L << (k_cblock_size_log - lookup_bits + 1));
-
- // Compute the id of block containing i.
- long list_size = list_end - list_beg;
- long approx = ((cblock_i * list_size) >> k_cblock_size_log);
-
- // Extract the lookup table entry.
- long lookup_mask = (1L << lookup_bits) - 1;
- long begin = (m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask);
-
- // Empty block optimization.
- if (begin == list_size + 1) {
- // Block containing cblock_i is empty, just find the beginning.
- ++approx;
- while ((m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask) == list_size + 1) ++approx;
- begin = (m_freq_trunk[cblock_beg + list_beg + approx] & lookup_mask);
- return rank_up_to_cblock + begin;
- }
-
- long next_block_begin = (approx + 1 == list_size) ? list_size :
- (m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask);
-
- // Correct next_block_begin.
- if (approx + 1 != list_size && next_block_begin == list_size + 1) {
- ++approx;
- while ((m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask) == list_size + 1) ++approx;
- next_block_begin = (m_freq_trunk[cblock_beg + list_beg + approx + 1] & lookup_mask);
- }
-
- // Correct the value of begin and return the answer.
- if (i_refpoint_offset >= threshold) {
- // Case 1: easy case, will happen most of the time.
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset)
- ++begin;
-
- return rank_up_to_cblock + begin;
- } else {
- // Case 2: executed very rarely.
- if (begin == next_block_begin || (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < (2 * threshold)) {
- // Case 2a: the value in the occ list was small -> the ref
- // point for i and for the block are the same, we
- // proceed as before, without modifying i_refpoint_offset.
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset)
- ++begin;
-
- return rank_up_to_cblock + begin;
- } else {
- // Case 2b: block occurrences were encoded wrt to the
- // previous ref point -> we increase i_refpoint_offset
- // by refpoint_dist and proceed as before.
- i_refpoint_offset += (1L << refpoint_dist_log);
- while (begin < next_block_begin && (m_freq_trunk[cblock_beg + list_beg + begin] >> lookup_bits) < i_refpoint_offset)
- ++begin;
-
- return rank_up_to_cblock + begin;
- }
- }
- } else { // type-II cblock
- long sblock_id = (i >> k_sblock_size_log);
- long sblock_rank = m_sblock_header[(sblock_id << 8) + c];
-
- unsigned char type = m_cblock_mapping[2 * (c * n_cblocks + cblock_id)];
- unsigned char c_map = m_cblock_mapping[2 * (c * n_cblocks + cblock_id) + 1];
-
- long freq_cnt_bits = (m_cblock_header[cblock_id] & 255L);
- long rare_cnt_bits = ((m_cblock_header[cblock_id] >> 8) & 255L);
- long block_id = (i >> freq_cnt_bits);
-
- if (type == k_char_type_freq) {
- // Case 1 (fastest): symbol c was frequent in the context block.
- // Answer a query using frequent trunk.
- long block_rank = m_freq_trunk[(block_id << freq_cnt_bits) + c_map] >> 8;
- long extra = 0;
- for (long j = (block_id << freq_cnt_bits); j < i; ++j)
- if ((m_freq_trunk[j] & 255) == c_map) ++extra;
-
- return sblock_rank + block_rank + extra;
- } else if (type == k_char_type_rare) {
- // Case 2: symbol c was rare inside the context block.
- // Compute new_i.
- long rare_trunk_ptr = (m_cblock_header[cblock_id] >> 16);
- long new_i = m_freq_trunk[((block_id + 1) << freq_cnt_bits) - 1] >> 8;
- for (long j = (block_id << freq_cnt_bits); j < i; ++j)
- if ((m_freq_trunk[j] & 255) + 1 == (1U << freq_cnt_bits)) ++new_i;
-
- // Answer a query on rare trunk.
- long rare_block_id = (new_i >> rare_cnt_bits);
- long block_rank = m_rare_trunk[rare_trunk_ptr +
- (rare_block_id << rare_cnt_bits) + c_map] >> 8;
- long extra = 0;
- for (long j = (rare_block_id << rare_cnt_bits); j < new_i; ++j)
- if ((m_rare_trunk[rare_trunk_ptr + j] & 255) == c_map) ++extra;
-
- return sblock_rank + block_rank + extra;
- } else {
- // Case 3: symbol c does not occur in the context block.
- // Find the first cblock where c occurrs.
- while (cblock_id < n_cblocks && (cblock_id & k_cblocks_in_sblock_mask) &&
- m_cblock_mapping[2 * (c * n_cblocks + cblock_id)] == k_char_type_missing)
- ++cblock_id;
-
- if (cblock_id == n_cblocks) {
- // We reached the end of encoding, return count[c].
- return m_count[c];
- } else if (!(cblock_id & k_cblocks_in_sblock_mask)) {
- // We reached the boundary of superblock,
- // retreive the answer from superblock header.
- return m_sblock_header[256 * (cblock_id >> k_cblocks_in_sblock_log) + c];
- } else {
- // We found cblock where c occurrs, but it wasn't on the
- // sblock boundary. In the recursive call this will either
- // be case 1 or case 2.
- return rank(cblock_id << k_cblock_size_log, c);
- }
- }
- }
- }
-
- ~rank4n() {
- if (m_length) {
- free(m_sblock_header);
- free(m_cblock_header);
- free(m_cblock_header2);
- free(m_cblock_mapping);
- free(m_cblock_type);
- free(m_freq_trunk);
- free(m_rare_trunk);
- }
- free(m_count);
- }
-};
-
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned long rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblock_size = (1L << k_cblock_size_log);
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned long rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblock_size_mask = (1L << k_cblock_size_log) - 1;
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_2cblock_size = (2 << k_cblock_size_log);
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_2cblock_size_mask = (2 << k_cblock_size_log) - 1;
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_sigma = (1 << k_sigma_log);
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_sigma_mask = (1 << k_sigma_log) - 1;
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned long rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblock_size_mask_neg = ~((1L << k_cblock_size_log) - 1);
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblocks_in_sblock_log = k_sblock_size_log - k_cblock_size_log;
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblocks_in_sblock = (1 << (k_sblock_size_log - k_cblock_size_log));
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_cblocks_in_sblock_mask = (1 << (k_sblock_size_log - k_cblock_size_log)) - 1;
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_sblock_size = (1 << k_sblock_size_log);
-
-template<unsigned k_sblock_size_log, unsigned k_cblock_size_log, unsigned k_sigma_log>
- const unsigned rank4n<k_sblock_size_log, k_cblock_size_log, k_sigma_log>
- ::k_sblock_size_mask = (1 << k_sblock_size_log) - 1;
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_RANK_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/ranksel_support.h b/exttools/pSAscan-0.1.0/src/psascan_src/ranksel_support.h
deleted file mode 100644
index debe9877..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/ranksel_support.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/**
- * @file src/psascan_src/ranksel_support.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_RANKSEL_SUPPORT_H_INCLUDED
-#define __PSASCAN_SRC_RANKSEL_SUPPORT_H_INCLUDED
-
-#include <thread>
-#include <algorithm>
-
-#include "bitvector.h"
-
-
-namespace psascan_private {
-
-struct ranksel_support {
- //============================================================================
- // Compute sparse_rank[group_beg..group_end).
- //============================================================================
- static void process_group_of_chunks(long group_beg, long group_end,
- long chunk_size, long *sparse_rank, const bitvector *bv) {
- for (long chunk_id = group_beg; chunk_id < group_end; ++chunk_id) {
- long chunk_beg = chunk_id * chunk_size;
- long chunk_end = chunk_beg + chunk_size;
-
- sparse_rank[chunk_id] = bv->range_sum(chunk_beg, chunk_end);
- }
- }
-
-
- //============================================================================
- // Constructor.
- //============================================================================
- ranksel_support(const bitvector *bv, long length, long max_threads) {
- m_bv = bv;
- m_length = length;
-
- // 1
- //
- // Compute chunk size and allocate m_sparse_rank.
- m_chunk_size = std::min((1L << 20), (m_length + max_threads - 1) / max_threads);
- n_chunks = m_length / m_chunk_size; // we exclude the last partial chunk
- m_sparse_rank = (long *)malloc((n_chunks + 1) * sizeof(long));
-
- // 2
- //
- // Compute the sum of 1-bits inside each chunk and write to m_sparse_rank.
- // Since there can be more chunks than threads, we split chunks
- // into groups and let each thread handle the group of chunks.
- long chunk_max_group_size = (n_chunks + max_threads - 1) / max_threads;
- long n_chunk_groups = (n_chunks + chunk_max_group_size - 1) / chunk_max_group_size;
-
- std::thread **threads = new std::thread*[n_chunk_groups];
- for (long t = 0; t < n_chunk_groups; ++t) {
- long chunk_group_beg = t * chunk_max_group_size;
- long chunk_group_end = std::min(chunk_group_beg + chunk_max_group_size, n_chunks);
- threads[t] = new std::thread(process_group_of_chunks, chunk_group_beg,
- chunk_group_end, m_chunk_size, m_sparse_rank, m_bv);
- }
-
- for (long t = 0; t < n_chunk_groups; ++t) threads[t]->join();
- for (long t = 0; t < n_chunk_groups; ++t) delete threads[t];
- delete[] threads;
-
- // 3
- //
- // Compute partial (exclusive) sum on m_sparse_rank.
- long ones = 0L;
- for (long i = 0; i < n_chunks; ++i) {
- long temp = m_sparse_rank[i];
- m_sparse_rank[i] = ones;
- ones += temp;
- }
- m_sparse_rank[n_chunks] = ones;
- }
-
-
- //============================================================================
- // Find the largest position j such that the number of 0s in bv[0..j) is <= i.
- // In other words, find the position of i-th 0-bit in bv (i = 0, 1, ..).
- // 0 <= i < number of 0-bits in bv.
- //============================================================================
- inline long select0(long i) const {
- // Fast-forward through chunks preceding the chunk with the answer.
- long j = 0L;
- while (j < n_chunks && ((j + 1) * m_chunk_size) - m_sparse_rank[j + 1] <= i)
- ++j;
-
- long zero_cnt_j = (j * m_chunk_size) - m_sparse_rank[j];
- j *= m_chunk_size;
-
- // Find the final position in a single chunk.
- while (zero_cnt_j + (1 - m_bv->get(j)) <= i)
- zero_cnt_j += (1 - m_bv->get(j++));
-
- return j;
- }
-
-
- //============================================================================
- // Find the largest position j such that the number of 1s in bv[0..j) is <= i.
- // In other words, find the position of i-th 1-bit in bv (i = 0, 1, ..).
- // 0 <= i < number of 1-bits in bv.
- //============================================================================
- inline long select1(long i) const {
- // Fast-forward through chunks preceding the chunk with the answer.
- long j = 0L;
- while (j < n_chunks && m_sparse_rank[j + 1] <= i)
- ++j;
-
- long rank_j = m_sparse_rank[j];
- j *= m_chunk_size;
-
- // Find the final position in a single chunk.
- while (rank_j + m_bv->get(j) <= i)
- rank_j += m_bv->get(j++);
-
- return j;
- }
-
- //============================================================================
- // Compute the number of 1-bits in bv[0..i) with the help of sparse_rank.
- // Note:
- // - i is an integer in the range from 0 to length of bv (inclusive),
- // - sparse_rank[k] = number of 1-bits in bv[0..k * chunk_size),
- //============================================================================
- inline long rank(long i) const {
- long j = i / m_chunk_size;
- long result = m_sparse_rank[j];
- j *= m_chunk_size;
-
- while (j < i)
- result += m_bv->get(j++);
-
- return result;
- }
-
-
- //============================================================================
- // Compute the number of 0-bits in bv[0..i).
- // 0 <= i <= m_length.
- //============================================================================
- inline long rank0(long i) const {
- return i - rank(i);
- }
-
-
- ~ranksel_support() {
- free(m_sparse_rank);
- }
-
- long m_length; // length of bitvector
- long m_chunk_size; // chunk size
- long n_chunks; // number of chunks
- long *m_sparse_rank;
-
- const bitvector *m_bv;
-};
-
-} // psascan_private
-
-#endif // __PSASCAN_SRC_RANKSEL_SUPPORT_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/sparse_isa.h b/exttools/pSAscan-0.1.0/src/psascan_src/sparse_isa.h
deleted file mode 100644
index 3d872e1e..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/sparse_isa.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/**
- * @file src/psascan_src/sparse_isa.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section DESCRIPTION
- *
- * Sparse ISA encoding based on the ISAs algorithm computing
- * Lempel-Ziv (LZ77) factorization described in
- *
- * Dominik Kempa, Simon J. Puglisi:
- * Lempel-Ziv factorization: Simple, fast, practical.
- * In Proc. ALENEX 2013, p. 103-112.
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_SPARSE_ISA_H_INCLUDED
-#define __PSASCAN_SRC_SPARSE_ISA_H_INCLUDED
-
-#include <algorithm>
-#include <thread>
-
-
-namespace psascan_private {
-
-template<typename approx_rank_type, typename saidx_t, long k_sampling_rate_log>
-struct sparse_isa {
- private:
- long m_length;
- long m_last_isa;
- long m_i0;
-
- long *m_count;
- long *m_sparse_isa;
-
- const saidx_t *m_psa;
- const unsigned char *m_text;
- const approx_rank_type *m_rank;
-
- static const long k_sampling_rate;
- static const long k_sampling_rate_mask;
- static const long k_sigma = 256;
-
- private:
- template<typename T>
- static void compute_sparse_isa_aux(const T *psa, long block_beg,
- long block_end, long psa_size, long *sparse_isa, long &last) {
- for (long j = block_beg; j < block_end; ++j) {
- long sa_j = (long)psa[j];
- if (!(sa_j & k_sampling_rate_mask))
- sparse_isa[sa_j >> k_sampling_rate_log] = j;
- if (sa_j == psa_size - 1) last = j;
- }
- }
-
- public:
- sparse_isa(const saidx_t *psa, const unsigned char *text, long length,
- long i0, const approx_rank_type *rank, long max_threads) {
- m_psa = psa;
- m_length = length;
- m_rank = rank;
- m_text = text;
- m_i0 = i0;
-
- long elems = (m_length + k_sampling_rate - 1) / k_sampling_rate + 1;
- m_sparse_isa = (long *)malloc(elems * sizeof(long));
-
- long max_block_size = (m_length + max_threads - 1) / max_threads;
- long n_blocks = (m_length + max_block_size - 1) / max_block_size;
-
- std::thread **threads = new std::thread*[n_blocks];
- for (long t = 0; t < n_blocks; ++t) {
- long block_beg = t * max_block_size;
- long block_end = std::min(block_beg + max_block_size, m_length);
-
- threads[t] = new std::thread(compute_sparse_isa_aux<saidx_t>, m_psa,
- block_beg, block_end, m_length, m_sparse_isa, std::ref(m_last_isa));
- }
-
- for (long t = 0; t < n_blocks; ++t) threads[t]->join();
- for (long t = 0; t < n_blocks; ++t) delete threads[t];
- delete[] threads;
-
- m_count = (long *)malloc(k_sigma * sizeof(long));
- std::copy(rank->m_count, rank->m_count + k_sigma, m_count);
- ++m_count[text[length - 1]];
- --m_count[0];
-
- for (long i = 0, s = 0; i < k_sigma; ++i) {
- long t = m_count[i];
- m_count[i] = s;
- s += t;
- }
- }
-
- inline long query(long j) const {
- long isa_i;
- long i = ((j + k_sampling_rate - 1) >> k_sampling_rate_log);
- if ((i << k_sampling_rate_log) < m_length) {
- isa_i = (long)m_sparse_isa[i];
- i <<= k_sampling_rate_log;
- } else {
- isa_i = m_last_isa;
- i = m_length - 1;
- }
-
- while (i != j) {
- // Compute ISA[i - 1] from ISA[i].
- // Invariant:
- // isa_i = ISA[i]
- // j <= i
- unsigned char c = m_text[i - 1];
- int delta = (isa_i > m_i0 && c == 0);
-
- isa_i = m_count[c] + m_rank->rank(isa_i, c) - delta;
- while (isa_i < 0 || (long)m_psa[isa_i] != i - 1)
- ++isa_i;
-
- --i;
- }
-
- return isa_i;
- }
-
- ~sparse_isa() {
- free(m_sparse_isa);
- free(m_count);
- }
-};
-
-template<typename approx_rank_type, typename saidx_t, long k_sampling_rate_log>
-const long sparse_isa<approx_rank_type, saidx_t, k_sampling_rate_log>
- ::k_sampling_rate = (1L << k_sampling_rate_log);
-
-template<typename approx_rank_type, typename saidx_t, long k_sampling_rate_log>
-const long sparse_isa<approx_rank_type, saidx_t, k_sampling_rate_log>
- ::k_sampling_rate_mask = (1L << k_sampling_rate_log) - 1;
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_SPARSE_ISA_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/stream.h b/exttools/pSAscan-0.1.0/src/psascan_src/stream.h
deleted file mode 100644
index fcbd1823..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/stream.h
+++ /dev/null
@@ -1,265 +0,0 @@
-/**
- * @file src/psascan_src/stream.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_STREAM_H_INCLUDED
-#define __PSASCAN_SRC_STREAM_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <string>
-#include <mutex>
-#include <algorithm>
-
-#include "utils.h"
-#include "rank.h"
-#include "gap_buffer.h"
-#include "update.h"
-#include "stream_info.h"
-#include "multifile.h"
-#include "multifile_bit_stream_reader.h"
-#include "async_multifile_bit_stream_reader.h"
-#include "async_backward_skip_stream_reader.h"
-#include "async_bit_stream_writer.h"
-
-
-namespace psascan_private {
-
-std::mutex stdout_mutex;
-
-template<typename block_offset_type>
-void parallel_stream(
- gap_buffer_poll<block_offset_type> *full_gap_buffers,
- gap_buffer_poll<block_offset_type> *empty_gap_buffers,
- long stream_block_beg,
- long stream_block_end,
- block_offset_type i,
- const long *count,
- block_offset_type whole_suffix_rank,
- const rank4n<> *rank,
- unsigned char last,
- std::string text_filename,
- long length,
- std::string &tail_gt_filename,
- stream_info *info,
- int thread_id,
- long gap_range_size,
- long gap_buf_size,
- const multifile *tail_gt_begin,
- long n_increasers) {
-
- static const int max_buckets = 4096;
- int *block_id_to_sblock_id = new int[max_buckets];
-
- long bucket_size = 1;
- long bucket_size_bits = 0;
- while ((gap_range_size + bucket_size - 1) / bucket_size > max_buckets)
- bucket_size <<= 1, ++bucket_size_bits;
- long n_buckets = (gap_range_size + bucket_size - 1) / bucket_size;
- int *block_count = new int[n_buckets];
-
- long max_buffer_elems = gap_buf_size / sizeof(block_offset_type);
- block_offset_type *temp = new block_offset_type[max_buffer_elems];
- int *oracle = new int[max_buffer_elems];
-
- static const long buffer_sample_size = 512;
- std::vector<block_offset_type> samples(buffer_sample_size);
- long *ptr = new long[n_increasers];
- block_offset_type *bucket_lbound = new block_offset_type[n_increasers + 1];
-
- typedef async_multifile_bit_stream_reader bit_stream_reader_type;
- typedef async_backward_skip_stream_reader<unsigned char> text_reader_type;
- typedef async_bit_stream_writer bit_stream_writer_type;
-
- text_reader_type *text_streamer = new text_reader_type(text_filename, length - stream_block_end, 4L << 20);
- bit_stream_writer_type *gt_out = new bit_stream_writer_type(tail_gt_filename, 1L << 20);
- bit_stream_reader_type gt_in(tail_gt_begin, length - stream_block_end, 1L << 20);
-
- long j = stream_block_end, dbg = 0L;
- while (j > stream_block_beg) {
- if (dbg > (1 << 26)) {
- info->m_mutex.lock();
- info->m_streamed[thread_id] = stream_block_end - j;
- info->m_update_count += 1;
- if (info->m_update_count == info->m_thread_count) {
- info->m_update_count = 0L;
- long double elapsed = utils::wclock() - info->m_timestamp;
- long total_streamed = 0L;
-
- for (long t = 0; t < info->m_thread_count; ++t)
- total_streamed += info->m_streamed[t];
- long double speed = (total_streamed / (1024.L * 1024)) / elapsed;
-
- stdout_mutex.lock();
- fprintf(stderr, "\r Stream: %.2Lf%%. Time: %.2Lf. Speed: %.2LfMiB/s",
- (total_streamed * 100.L) / info->m_tostream, elapsed, speed);
- stdout_mutex.unlock();
- }
- info->m_mutex.unlock();
- dbg = 0L;
- }
-
- // Get a gap buffer from the poll of empty buffers.
- std::unique_lock<std::mutex> lk(empty_gap_buffers->m_mutex);
- while (!empty_gap_buffers->available())
- empty_gap_buffers->m_cv.wait(lk);
-
- gap_buffer<block_offset_type> *b = empty_gap_buffers->get();
- lk.unlock();
- empty_gap_buffers->m_cv.notify_one(); // let others know they should re-check
-
- // Process buffer -- fill with gap values.
- long left = j - stream_block_beg;
- b->m_filled = std::min(left, b->m_size);
- dbg += b->m_filled;
- std::fill(block_count, block_count + n_buckets, 0);
-
- for (long t = 0L; t < b->m_filled; ++t, --j) {
- unsigned char c = text_streamer->read();
-
- gt_out->write(i > whole_suffix_rank);
- bool next_gt = (gt_in.read());
-
- int delta = (i > whole_suffix_rank && c == 0);
- i = (block_offset_type)(count[c] + rank->rank((long)i, c) - delta);
- if (c == last && next_gt) ++i;
- temp[t] = i;
- block_count[i >> bucket_size_bits]++;
- }
-
- // Compute super-buckets.
- long ideal_sblock_size = (b->m_filled + n_increasers - 1) / n_increasers;
- long max_sbucket_size = 0;
- long bucket_id_beg = 0;
- for (long t = 0; t < n_increasers; ++t) {
- long bucket_id_end = bucket_id_beg, size = 0L;
- while (bucket_id_end < n_buckets && size < ideal_sblock_size)
- size += block_count[bucket_id_end++];
- b->sblock_size[t] = size;
- max_sbucket_size = std::min(max_sbucket_size, size);
- for (long id = bucket_id_beg; id < bucket_id_end; ++id)
- block_id_to_sblock_id[id] = t;
- bucket_id_beg = bucket_id_end;
- }
-
- if (max_sbucket_size < 4L * ideal_sblock_size) {
- for (long t = 0, curbeg = 0; t < n_increasers; curbeg += b->sblock_size[t++])
- b->sblock_beg[t] = ptr[t] = curbeg;
-
- // Permute the elements of the buffer.
- for (long t = 0; t < b->m_filled; ++t) {
- long id = (temp[t] >> bucket_size_bits);
- long sblock_id = block_id_to_sblock_id[id];
- oracle[t] = ptr[sblock_id]++;
- }
-
- for (long t = 0; t < b->m_filled; ++t) {
- long addr = oracle[t];
- b->m_content[addr] = temp[t];
- }
- } else {
- // Repeat the partition into sbuckets, this time using random sample.
- // This is a fallback mechanism in case the quick partition failed.
- // It is not suppose to happen to often.
-
- // Compute random sample of elements in the buffer.
- for (long t = 0; t < buffer_sample_size; ++t)
- samples[t] = temp[utils::random_long(0L, b->m_filled - 1)];
- std::sort(samples.begin(), samples.end());
- samples.erase(std::unique(samples.begin(), samples.end()), samples.end());
-
- // Compute bucket boundaries (lower bound is enough).
- std::fill(bucket_lbound, bucket_lbound + n_increasers + 1, gap_range_size);
-
- long step = (samples.size() + n_increasers - 1) / n_increasers;
- for (size_t t = 1, p = step; p < samples.size(); ++t, p += step)
- bucket_lbound[t] = (samples[p - 1] + samples[p] + 1) / 2;
- bucket_lbound[0] = 0;
-
- // Compute bucket sizes and sblock id into oracle array.
- std::fill(b->sblock_size, b->sblock_size + n_increasers, 0L);
- for (long t = 0; t < b->m_filled; ++t) {
- block_offset_type x = temp[t];
- int id = n_increasers;
- while (bucket_lbound[id] > x) --id;
- oracle[t] = id;
- b->sblock_size[id]++;
- }
-
- // Permute elements into their own buckets using oracle.
- for (long t = 0, curbeg = 0; t < n_increasers; curbeg += b->sblock_size[t++])
- b->sblock_beg[t] = ptr[t] = curbeg;
-
- for (long t = 0; t < b->m_filled; ++t) {
- long sblock_id = oracle[t];
- oracle[t] = ptr[sblock_id]++;
- }
-
- for (long t = 0; t < b->m_filled; ++t) {
- long addr = oracle[t];
- b->m_content[addr] = temp[t];
- }
- }
-
- // Add the buffer to the poll of full buffers and notify waiting thread.
- std::unique_lock<std::mutex> lk2(full_gap_buffers->m_mutex);
- full_gap_buffers->add(b);
- lk2.unlock();
- full_gap_buffers->m_cv.notify_one();
- }
-
- delete text_streamer;
- delete gt_out;
-
- // Report that another worker thread has finished.
- std::unique_lock<std::mutex> lk(full_gap_buffers->m_mutex);
- full_gap_buffers->increment_finished_workers();
- lk.unlock();
-
- // Notify waiting update threads in case no more buffers
- // are going to be produces by worker threads.
- full_gap_buffers->m_cv.notify_one();
-
- delete[] block_count;
- delete[] block_id_to_sblock_id;
- delete[] temp;
- delete[] oracle;
- delete[] ptr;
- delete[] bucket_lbound;
-}
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_STREAM_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/stream_info.h b/exttools/pSAscan-0.1.0/src/psascan_src/stream_info.h
deleted file mode 100644
index 49624b6b..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/stream_info.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * @file src/psascan_src/stream_info.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_STREAM_INFO_H_INCLUDED
-#define __PSASCAN_SRC_STREAM_INFO_H_INCLUDED
-
-#include <mutex>
-#include <algorithm>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-
-//=============================================================================
-// Used to store progress information for different threads during streaming.
-//=============================================================================
-struct stream_info {
- stream_info(long thread_count, long tostream)
- : m_update_count(0L),
- m_thread_count(thread_count),
- m_tostream(tostream) {
- m_streamed = new long[thread_count];
- std::fill(m_streamed, m_streamed + thread_count, 0L);
-
- m_idle_update = new long double[thread_count];
- m_idle_work = new long double[thread_count];
- std::fill(m_idle_update, m_idle_update + thread_count, 0.L);
- std::fill(m_idle_work, m_idle_work + thread_count, 0.L);
-
- m_timestamp = utils::wclock();
- }
-
- ~stream_info() {
- delete[] m_streamed;
- delete[] m_idle_work;
- delete[] m_idle_update;
- }
-
- long m_update_count; // number of updates
- long m_thread_count; // number of threads
- long m_tostream; // total text length to stream
- long double m_timestamp; // when the streaming started
- long *m_streamed; // how many bytes streamed by each thread
- long double *m_idle_update;
- long double *m_idle_work;
-
- std::mutex m_mutex;
-};
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_STREAM_INFO_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/uint40.h b/exttools/pSAscan-0.1.0/src/psascan_src/uint40.h
deleted file mode 100644
index 938aca4a..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/uint40.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/******************************************************************************
- *
- * Class representing a 40-bit unsigned integer encoded in five bytes.
- *
- ******************************************************************************
- * Copyright (C) 2012 Timo Bingmann <tb@panthema.net>
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
- *****************************************************************************
- *
- * NOTE: This is slightly modified version of the file used in eSAIS-0.5.4
- * (https://panthema.net/2012/1119-eSAIS-Inducing-Suffix-and-LCP-Arrays-
- * in-External-Memory/). In particular, it contains a small bugfix in the
- * += operator.
- *
- * Modified by Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- *****************************************************************************/
-
-
-#ifndef __UINT40_H_INCLUDED
-#define __UINT40_H_INCLUDED
-
-#include <inttypes.h>
-#include <stdint.h>
-#include <cassert>
-#include <iostream>
-#include <limits>
-#include <unistd.h>
-
-
-class uint40
-{
-private:
- uint32_t low;
- uint8_t high;
-
-public:
- inline uint40()
- {
- }
-
- inline uint40(uint32_t l, uint8_t h)
- : low(l), high(h)
- {
- }
-
- inline uint40(const uint40& a)
- : low(a.low), high(a.high)
- {
- }
-
- inline uint40(const int& a)
- : low(a), high(0)
- {
- }
-
- inline uint40(const unsigned int& a)
- : low(a), high(0)
- {
- }
-
- inline uint40(const uint64_t& a)
- : low(a & 0xFFFFFFFF), high((a >> 32) & 0xFF)
- {
- assert( a <= 0xFFFFFFFFFFLU );
- }
-
- inline uint40(const long& a)
- : low(a & 0xFFFFFFFFL), high((a >> 32) & 0xFF) {
- assert( a <= 0xFFFFFFFFFFL );
- }
-
- inline uint64_t ull() const {
- return ((uint64_t)high) << 32 | (uint64_t)low;
- }
-
- inline long ll() const
- {
- return (long)ull();
- }
-
- inline operator uint64_t() const
- {
- return ull();
- }
-
- inline uint64_t u64() const
- {
- return ((uint64_t)high) << 32 | (uint64_t)low;
- }
-
- inline uint40& operator++ ()
- {
- if (low == std::numeric_limits<uint32_t>::max())
- ++high, low = 0;
- else
- ++low;
- return *this;
- }
-
- inline uint40& operator-- ()
- {
- if (low == 0)
- --high, low = std::numeric_limits<uint32_t>::max();
- else
- --low;
- return *this;
- }
-
- inline uint40& operator+= (const uint40& b)
- {
- uint64_t add = (uint64_t)low + b.low; // BUGFIX
- low = add & 0xFFFFFFFF;
- high += b.high + ((add >> 32) & 0xFF);
- return *this;
- }
-
- inline bool operator== (const uint40& b) const
- {
- return (low == b.low) && (high == b.high);
- }
-
- inline bool operator!= (const uint40& b) const
- {
- return (low != b.low) || (high != b.high);
- }
-
- inline bool operator< (const uint40& b) const
- {
- return (high < b.high) || (high == b.high && low < b.low);
- }
-
- inline bool operator<= (const uint40& b) const
- {
- return (high < b.high) || (high == b.high && low <= b.low);
- }
-
- inline bool operator> (const uint40& b) const
- {
- return (high > b.high) || (high == b.high && low > b.low);
- }
-
- inline bool operator>= (const uint40& b) const
- {
- return (high > b.high) || (high == b.high && low >= b.low);
- }
-
- friend std::ostream& operator<< (std::ostream& os, const uint40& a)
- {
- return os << a.ull();
- }
-
-} __attribute__((packed));
-
-namespace std {
-
-template<>
-class numeric_limits<uint40> {
-public:
- static uint40 min() { return uint40(std::numeric_limits<uint32_t>::min(),
- std::numeric_limits<uint8_t>::min()); }
-
- static uint40 max() { return uint40(std::numeric_limits<uint32_t>::max(),
- std::numeric_limits<uint8_t>::max()); }
-};
-
-}
-
-#endif // __UINT40_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/update.h b/exttools/pSAscan-0.1.0/src/psascan_src/update.h
deleted file mode 100644
index 4e757943..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/update.h
+++ /dev/null
@@ -1,226 +0,0 @@
-/**
- * @file src/psascan_src/update.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_UPDATE_H_INCLUDED
-#define __PSASCAN_SRC_UPDATE_H_INCLUDED
-
-#include <thread>
-#include <mutex>
-#include <condition_variable>
-#include <algorithm>
-
-#include "utils.h"
-#include "gap_buffer.h"
-#include "gap_array.h"
-#include "stream_info.h"
-
-
-namespace psascan_private {
-
-//==============================================================================
-// This object creates a given number of threads that will perform gap array
-// updates. Most of the time all threads are sleeping on a conditional variable.
-// Once the gap buffer is available for processing, they are all woken up and
-// perform the update in parallel. The caller then waits until all threads are
-// finished and then puts the gap buffer in the poll of empty buffers.
-//
-// Only one object of this class should exist.
-//==============================================================================
-template<typename block_offset_type>
-struct gap_parallel_updater {
-
- template<typename T>
- static void parallel_update(gap_parallel_updater<T> *updater, int id) {
- while (true) {
- // Wait until there is a gap buffer available or the
- // message 'no more buffers' arrives.
- std::unique_lock<std::mutex> lk(updater->m_avail_mutex);
- while (!(updater->m_avail[id]) && !(updater->m_avail_no_more))
- updater->m_avail_cv.wait(lk);
-
- if (!(updater->m_avail[id]) && updater->m_avail_no_more) {
- // No more buffers -- exit.
- lk.unlock();
- return;
- }
-
- updater->m_avail[id] = false;
- lk.unlock();
-
- // Safely perform the update.
- gap_buffer<T> *buf = updater->m_buffer;
- buffered_gap_array *gap = updater->m_gap_array;
- int beg = buf->sblock_beg[id];
- int end = beg + buf->sblock_size[id];
-
- for (int i = beg; i < end; ++i) {
- T x = buf->m_content[i];
- gap->m_count[x]++;
-
- // Check if values wrapped-around.
- if (gap->m_count[x] == 0L) {
- gap->m_excess_mutex.lock();
- gap->add_excess(x);
- gap->m_excess_mutex.unlock();
- }
- }
-
- // Update the number of finished threads.
- bool finished_last = false;
- std::unique_lock<std::mutex> lk2(updater->m_finished_mutex);
- updater->m_finished++;
- if (updater->m_finished == updater->m_threads_cnt)
- finished_last = true;
- lk2.unlock();
-
- // If this was the last thread finishing, let the caller know.
- if (finished_last)
- updater->m_finished_cv.notify_one();
- }
- }
-
- gap_parallel_updater(buffered_gap_array *gap_array, int threads_cnt)
- : m_gap_array(gap_array),
- m_threads_cnt(threads_cnt),
- m_avail_no_more(false) {
- m_avail = new bool[m_threads_cnt];
- std::fill(m_avail, m_avail + m_threads_cnt, false);
- m_threads = new std::thread*[m_threads_cnt];
-
- // After this, threads immediately hang up on m_avail_cv.
- for (int i = 0; i < m_threads_cnt; ++i)
- m_threads[i] = new std::thread(parallel_update<block_offset_type>, this, i);
- }
-
- ~gap_parallel_updater() {
- // Signal all threads to finish.
- std::unique_lock<std::mutex> lk(m_avail_mutex);
- m_avail_no_more = true;
- lk.unlock();
- m_avail_cv.notify_all();
-
- // Wait until all threads finish and release memory.
- for (int i = 0; i < m_threads_cnt; ++i) {
- m_threads[i]->join();
- delete m_threads[i];
- }
- delete[] m_threads;
- delete[] m_avail;
- }
-
- void update(gap_buffer<block_offset_type> *buffer) {
- // Prepare a message for each thread that new buffer is available.
- std::unique_lock<std::mutex> lk(m_avail_mutex);
- m_finished = 0;
- m_buffer = buffer;
- for (int i = 0; i < m_threads_cnt; ++i)
- m_avail[i] = true;
- lk.unlock();
-
- // Wake up all threads to perform the update.
- m_avail_cv.notify_all();
-
- // Wait until all threads report that they are done.
- std::unique_lock<std::mutex> lk2(m_finished_mutex);
- while (m_finished != m_threads_cnt)
- m_finished_cv.wait(lk2);
- lk2.unlock();
-
- // We are done processing the buffer. The caller of this method
- // can now place the buffer into the poll of empty buffers.
- }
-
-private:
- buffered_gap_array *m_gap_array;
-
- std::thread **m_threads;
- int m_threads_cnt;
-
- gap_buffer<block_offset_type> *m_buffer;
-
- // For notifying threads about available buffer.
- std::mutex m_avail_mutex;
- std::condition_variable m_avail_cv;
- bool *m_avail;
- bool m_avail_no_more;
-
- // The mutex below is to protect m_finished. The condition
- // variable allows the caller to wait (and to be notified when done)
- // until threads complete processing their section of the buffer.
- int m_finished;
- std::mutex m_finished_mutex;
- std::condition_variable m_finished_cv;
-};
-
-template<typename block_offset_type>
-void gap_updater(gap_buffer_poll<block_offset_type> *full_gap_buffers,
- gap_buffer_poll<block_offset_type> *empty_gap_buffers,
- buffered_gap_array *gap, long n_increasers) {
-
- gap_parallel_updater<block_offset_type> *updater =
- new gap_parallel_updater<block_offset_type>(gap, n_increasers);
-
- while (true) {
- // Get a buffer from the poll of full buffers.
- std::unique_lock<std::mutex> lk(full_gap_buffers->m_mutex);
- while (!full_gap_buffers->available() && !full_gap_buffers->finished())
- full_gap_buffers->m_cv.wait(lk);
-
- if (!full_gap_buffers->available() && full_gap_buffers->finished()) {
- // There will be no more full buffers -- exit.
- lk.unlock();
- break;
- }
-
- gap_buffer<block_offset_type> *b = full_gap_buffers->get();
- lk.unlock();
-
- // Process buffer.
- updater->update(b);
-
- // Add the buffer to the poll of empty buffers and notify
- // the waiting thread.
- std::unique_lock<std::mutex> lk2(empty_gap_buffers->m_mutex);
- empty_gap_buffers->add(b);
- lk2.unlock();
- empty_gap_buffers->m_cv.notify_one();
- }
-
- delete updater;
-}
-
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_UPDATE_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/utils.cpp b/exttools/pSAscan-0.1.0/src/psascan_src/utils.cpp
deleted file mode 100644
index a6eb7f08..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/utils.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/**
- * @file src/psascan_src/utils.cpp
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <errno.h>
-#include <stdint.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <string>
-#include <fstream>
-#include <algorithm>
-
-#include "utils.h"
-
-
-namespace psascan_private {
-namespace utils {
-
-long double wclock() {
- timeval tim;
- gettimeofday(&tim, NULL);
-
- return tim.tv_sec + (tim.tv_usec / 1000000.0L);
-}
-
-std::FILE *open_file(std::string fname, std::string mode) {
- std::FILE *f = std::fopen(fname.c_str(), mode.c_str());
- if (f == NULL) {
- std::perror(fname.c_str());
- std::exit(EXIT_FAILURE);
- }
-
- return f;
-}
-
-long file_size(std::string fname) {
- std::FILE *f = open_file(fname, "rt");
- std::fseek(f, 0L, SEEK_END);
- long size = std::ftell(f);
- std::fclose(f);
-
- return size;
-}
-
-bool file_exists(std::string fname) {
- std::FILE *f = std::fopen(fname.c_str(), "r");
- bool ret = (f != NULL);
- if (f != NULL)
- std::fclose(f);
-
- return ret;
-}
-
-void file_delete(std::string fname) {
- int res = std::remove(fname.c_str());
- if (res) {
- fprintf(stderr, "Failed to delete %s: %s\n",
- fname.c_str(), strerror(errno));
- std::exit(EXIT_FAILURE);
- }
-}
-
-std::string absolute_path(std::string fname) {
- char path[1 << 12];
- bool created = false;
-
- if (!file_exists(fname)) {
- // We need to create the file, since realpath fails on non-existing files.
- std::fclose(open_file(fname, "w"));
- created = true;
- }
- if (!realpath(fname.c_str(), path)) {
- fprintf(stderr, "\nError: realpath failed for %s\n", fname.c_str());
- std::exit(EXIT_FAILURE);
- }
-
- if (created)
- file_delete(fname);
-
- return std::string(path);
-}
-
-void read_block(std::FILE *f, long beg, long length, unsigned char *b) {
- std::fseek(f, beg, SEEK_SET);
- read_n_objects_from_file<unsigned char>(b, length, f);
-}
-
-void read_block(std::string fname, long beg, long length, unsigned char *b) {
- std::FILE *f = open_file(fname.c_str(), "r");
- read_block(f, beg, length, b);
- std::fclose(f);
-}
-
-int random_int(int p, int r) {
- return p + rand() % (r - p + 1);
-}
-
-long random_long(long p, long r) {
- long x = random_int(0, 1000000000);
- long y = random_int(0, 1000000000);
- long z = x * 1000000000L + y;
- return p + z % (r - p + 1);
-}
-
-void fill_random_string(unsigned char* &s, long length, int sigma) {
- for (long i = 0; i < length; ++i)
- s[i] = random_int(0, sigma - 1);
-}
-
-void fill_random_letters(unsigned char* &s, long n, int sigma) {
- fill_random_string(s, n, sigma);
- for (long i = 0; i < n; ++i) s[i] += 'a';
-}
-
-std::string random_string_hash() {
- uint64_t hash = (uint64_t)rand() * RAND_MAX + rand();
- std::stringstream ss;
- ss << hash;
- return ss.str();
-}
-
-long log2ceil(long x) {
- long pow2 = 1, w = 0;
- while (pow2 < x) { pow2 <<= 1; ++w; }
- return w;
-}
-
-long log2floor(long x) {
- long pow2 = 1, w = 0;
- while ((pow2 << 1) <= x) { pow2 <<= 1; ++w; }
- return w;
-}
-
-} // namespace utils
-} // namespace psascan_private
diff --git a/exttools/pSAscan-0.1.0/src/psascan_src/utils.h b/exttools/pSAscan-0.1.0/src/psascan_src/utils.h
deleted file mode 100644
index e5f3522f..00000000
--- a/exttools/pSAscan-0.1.0/src/psascan_src/utils.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * @file src/psascan_src/utils.h
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#ifndef __PSASCAN_SRC_PSASCAN_UTILS_H_INCLUDED
-#define __PSASCAN_SRC_PSASCAN_UTILS_H_INCLUDED
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include <sstream>
-
-
-namespace psascan_private {
-namespace utils {
-
-#define STRX(x) #x
-#define STR(x) STRX(x)
-
-// Time
-long double wclock();
-
-// Basic file handling
-std::FILE *open_file(std::string fname, std::string mode);
-long file_size(std::string fname);
-bool file_exists(std::string fname);
-void file_delete(std::string fname);
-std::string absolute_path(std::string fname);
-
-// File I/O
-void read_block(std::string fname, long beg, long length, unsigned char *b);
-void read_block(std::FILE *f, long beg, long length, unsigned char *b);
-
-template<typename value_type>
-void write_objects_to_file(const value_type *tab, long length, std::string fname) {
- std::FILE *f = open_file(fname, "w");
- size_t fwrite_ret = std::fwrite(tab, sizeof(value_type), length, f);
- if ((long)fwrite_ret != length) {
- fprintf(stderr, "\nError: fwrite in line %s of %s returned %ld\n",
- STR(__LINE__), STR(__FILE__), fwrite_ret);
- std::exit(EXIT_FAILURE);
- }
-
- std::fclose(f);
-}
-
-template<typename value_type>
-void add_objects_to_file(const value_type *tab, long length, std::FILE *f) {
- size_t fwrite_ret = std::fwrite(tab, sizeof(value_type), length, f);
- if ((long)fwrite_ret != length) {
- fprintf(stderr, "\nError: fwrite in line %s of %s returned %lu\n",
- STR(__LINE__), STR(__FILE__), fwrite_ret);
- std::exit(EXIT_FAILURE);
- }
-}
-
-template<typename value_type>
-void add_objects_to_file(const value_type *tab, long length, std::string fname) {
- std::FILE *f = utils::open_file(fname.c_str(), "a");
- add_objects_to_file<value_type>(tab, length, f);
- std::fclose(f);
-}
-
-template<typename value_type>
-void read_n_objects_from_file(value_type* tab, long length, std::FILE *f) {
- size_t fread_ret = std::fread(tab, sizeof(value_type), length, f);
- if ((long)fread_ret != length) {
- fprintf(stderr, "\nError: fread in line %s of %s returned %ld\n",
- STR(__LINE__), STR(__FILE__), fread_ret);
- std::exit(EXIT_FAILURE);
- }
-}
-
-template<typename value_type>
-void read_n_objects_from_file(value_type* tab, long length, std::string fname) {
- std::FILE *f = open_file(fname, "r");
- read_n_objects_from_file<value_type>(tab, length, f);
- std::fclose(f);
-}
-
-template<typename value_type>
-void read_objects_from_file(value_type* &tab, long &length, std::string fname) {
- std::FILE *f = open_file(fname, "r");
- std::fseek(f, 0L, SEEK_END);
- length = (long)(std::ftell(f) / sizeof(value_type));
- std::rewind(f);
- tab = (value_type *)malloc(length * sizeof(value_type));
- read_n_objects_from_file<value_type>(tab, length, f);
- std::fclose(f);
-}
-
-// Randomness
-int random_int(int p, int r);
-long random_long(long p, long r);
-void fill_random_string(unsigned char* &s, long length, int sigma);
-void fill_random_letters(unsigned char* &s, long n, int sigma);
-std::string random_string_hash();
-
-// Math
-long log2ceil(long x);
-long log2floor(long x);
-
-// Misc
-template<typename int_type>
-std::string intToStr(int_type x) {
- std::stringstream ss;
- ss << x;
- return ss.str();
-}
-
-} // namespace utils
-} // namespace psascan_private
-
-#endif // __PSASCAN_SRC_PSASCAN_UTILS_H_INCLUDED
diff --git a/exttools/pSAscan-0.1.0/tools/delete-bytes-255/Makefile b/exttools/pSAscan-0.1.0/tools/delete-bytes-255/Makefile
deleted file mode 100644
index f6c7269c..00000000
--- a/exttools/pSAscan-0.1.0/tools/delete-bytes-255/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-SHELL = /bin/sh
-CC = g++
-CFLAGS = -Wall -Wextra -pedantic -Wshadow -funroll-loops -DNDEBUG -O3 -march=native
-
-all: delete255
-
-delete255:
- $(CC) $(CFLAGS) -o delete255 main.cpp
-
-clean:
- /bin/rm -f delete255 *.o
diff --git a/exttools/pSAscan-0.1.0/tools/delete-bytes-255/main.cpp b/exttools/pSAscan-0.1.0/tools/delete-bytes-255/main.cpp
deleted file mode 100644
index 3d6366d8..00000000
--- a/exttools/pSAscan-0.1.0/tools/delete-bytes-255/main.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * @file tools/delete-bytes-255/main.cpp
- * @author Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * @section LICENCE
- *
- * This file is part of pSAscan v0.1.0
- * See: http://www.cs.helsinki.fi/group/pads/
- *
- * Copyright (C) 2014-2015
- * Juha Karkkainen <juha.karkkainen (at) cs.helsinki.fi>
- * Dominik Kempa <dominik.kempa (at) gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- **/
-
-#include <cstdio>
-#include <cstdlib>
-#include <sys/time.h>
-
-
-long double wallclock() {
- timeval tim;
- gettimeofday(&tim, NULL);
- return tim.tv_sec + (tim.tv_usec / 1000000.L);
-}
-
-int main(int argc, char **argv) {
- if (argc != 2) {
- std::fprintf(stderr, "Usage: %s FILE\nErase all bytes with value 255 "
- "from FILE. Write result on standard output.\n", argv[0]);
- std::exit(EXIT_FAILURE);
- }
-
- // Open the input file.
- std::FILE *f = std::fopen(argv[1], "r");
- if (f == NULL) {
- std::perror(argv[1]);
- std::exit(EXIT_FAILURE);
- }
-
- // Get the file size.
- std::fseek(f, 0L, SEEK_END);
- long size = std::ftell(f);
- std::rewind(f);
-
- // Allocate the buffer.
- static const long bufsize = (2L << 20);
- unsigned char *buffer = new unsigned char[bufsize];
-
- // Do the filtering.
- long double start = wallclock();
- std::size_t elems, count = 0, total = 0;
- while ((elems = std::fread(buffer, 1, bufsize, f)) > 0) {
- total += elems;
- count += elems;
-
- // Filter the buffer.
- std::size_t ptr = 0;
- for (std::size_t j = 0; j < elems; ++j)
- if (buffer[j] != 255)
- buffer[ptr++] = buffer[j];
-
- // Write filtered buffer to stdout.
- if (ptr > 0)
- std::fwrite(buffer, 1, ptr, stdout);
-
- // Print progress message.
- if (count > (64L << 20)) {
- count = 0;
- long double elapsed = wallclock() - start;
- long double mib = (long double)total / (1L << 20);
- std::fprintf(stderr, "Processed %.0LfMiB (%.1Lf%%). Speed: %.2LfMiB/s.\r",
- mib, (100.L * total) / size, mib / elapsed);
- }
- }
-
- // Clean up.
- delete[] buffer;
- std::fclose(f);
-
- // Print summary.
- long double elapsed = wallclock() - start;
- long double mib = (long double)size / (1L << 20);
- std::fprintf(stderr, "Processed %.0LfMiB (100.0%%). Speed: %.2LfMiB/s.\n",
- mib, mib / elapsed);
-}
diff --git a/exttools/tools/CMakeLists.txt b/exttools/tools/CMakeLists.txt
deleted file mode 100644
index 45d70375..00000000
--- a/exttools/tools/CMakeLists.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-############################################################################
-# CMakeLists.txt
-#
-# Part of a simple STXXL example. See http://stxxl.sourceforge.net
-#
-# Copyright (C) 2013 Timo Bingmann <tb@panthema.net>
-#
-# Distributed under the Boost Software License, Version 1.0.
-# (See accompanying file LICENSE_1_0.txt or copy at
-# http://www.boost.org/LICENSE_1_0.txt)
-############################################################################
-
-# require cmake 2.6.4 (but please use 2.8.x)
-cmake_minimum_required(VERSION 2.6.4)
-
-# set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} /home/niki/opt)
-# set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} /home/niki/opt/lib/cmake/stxxl)
-# set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /home/niki/opt/lib)
-# set(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} /home/niki/opt/include)
-# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/home/niki/opt/include" )
-# SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -L/home/USERNAME/opt/lib" )
-
-# we first give our project a name
-project(myproject)
-set(CXX_STANDARD c++11)
-
-# prohibit in-source builds
-if("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}")
- message(SEND_ERROR "In-source builds are not allowed.")
-endif("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}")
-
-
-# search for stxxl-config.cmake which contains the library's configuration
-find_package(STXXL REQUIRED)
-
-# print some info (this can be removed)
-message(STATUS "STXXL_CXX_FLAGS: ${STXXL_CXX_FLAGS}")
-message(STATUS "STXXL_INCLUDE_DIRS: ${STXXL_INCLUDE_DIRS}")
-message(STATUS "STXXL_LIBRARIES: ${STXXL_LIBRARIES}")
-
-# apply CXXFLAGS to our configuration
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${STXXL_CXX_FLAGS}")
-
-# enable warnings (always good)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -W -Wall -O0 -ggdb")
-
-# add STXXL include directory
-include_directories(${STXXL_INCLUDE_DIRS})
-
-# create and executable and linke with STXXL
-add_executable(isaandbwt isaandbwt.cpp)
-target_link_libraries(isaandbwt ${STXXL_LIBRARIES})
-
-add_executable(readplcp readplcp.cpp)
-target_link_libraries(readplcp ${STXXL_LIBRARIES} /home/niki/opt/lib/libsdsl.a)
-
-add_executable(standardize standardize.cpp)
-target_link_libraries(standardize ${STXXL_LIBRARIES})
diff --git a/exttools/tools/isaandbwt.cpp b/exttools/tools/isaandbwt.cpp
deleted file mode 100644
index 1b22a327..00000000
--- a/exttools/tools/isaandbwt.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-/***************************************************************************
- * test1.cpp
- *
- * Part of a simple STXXL example. See http://stxxl.sourceforge.net
- *
- * Copyright (C) 2013 Timo Bingmann <tb@panthema.net>
- *
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- **************************************************************************/
-
-#include <iostream>
-#include <limits>
-
-#include <stxxl/vector>
-#include <stxxl/random>
-#include <stxxl/sort>
-#include <stxxl/bits/algo/ksort.h>
-#include "/scripts/code/dcheck.hpp"
-// struct my_less_int : std::less<int>
-// {
-// int min_value() const { return std::numeric_limits<int>::min(); };
-// int max_value() const { return std::numeric_limits<int>::max(); };
-// };
-//
-// int main(int argv,)
-// {
-// // create vector
-// stxxl::VECTOR_GENERATOR<int>::result vector;
-//
-// // fill vector with random integers
-// stxxl::random_number32 random;
-//
-// for (size_t i = 0; i < 100*1024*1024; ++i) {
-// vector.push_back(random());
-// }
-//
-// // sort vector using 16 MiB RAM
-// stxxl::sort(vector.begin(), vector.end(), my_less_int(), 16*1024*1024);
-//
-// // output first and last items:
-// std::cout << vector.size() << " items sorted ranging from "
-// << vector.front() << " to " << vector.back() << std::endl;
-//
-// return 0;
-// }
-
-
-
-#include <iostream>
-#include <fstream>
-#include <stxxl/bits/common/uint_types.h>
-
-size_t filesize( const char*const filepath ){
- std::ifstream file(filepath, std::ios::binary | std::ios::ate | std::ios::in);
- if(!file.good()) return 0;
- return file.tellg();
-}
-bool file_exists(const char *const filepath) {
- std::ifstream infile(filepath);
- return infile.good();
-}
-
-
-template<class int_t>
-class IntegerFileForwardIterator {
- const size_t m_size;
- std::ifstream m_is;
- size_t m_index;
- char m_buf[sizeof(int_t)];
- public:
-
- IntegerFileForwardIterator(const char*const filename)
- : m_size { filesize(filename) }
- , m_is {filename, std::ios::binary | std::ios::in }
- , m_index {0}
- {}
-
- size_t size() const { return m_size/sizeof(int_t); }
- size_t index() const { return m_index; }
- int_t operator*() { return *reinterpret_cast<int_t*>(m_buf); }
- IntegerFileForwardIterator& operator++(int) {
- m_is.read(m_buf, sizeof(int_t));
- ++m_index;
- return *this;
- }
-};
-
-template<class int_t>
-class IntegerFileArray {
- const size_t m_size;
- std::ifstream m_is;
- public:
- IntegerFileArray(const char*const filename)
- : m_size { filesize(filename) }
- , m_is {filename, std::ios::binary | std::ios::in }
- {}
- int_t operator[](size_t i) {
-// DCHECK_LT(i, size());
- m_is.seekg(i*sizeof(int_t), std::ios_base::beg);
- char buf[sizeof(int_t)];
- m_is.read(buf, sizeof(int_t));
- return *reinterpret_cast<int_t*>(buf);
- }
- size_t size() const { return m_size/sizeof(int_t); }
-};
-
-
-using namespace stxxl;
-void bwt() {
- IntegerFileForwardIterator<uint40> sa { "/bighome/workspace/eSAIS/build/src/a.sa5" };
- std::ifstream is("/bighome/workspace/eSAIS/build/src/a", std::ios::binary | std::ios::in);
- while(is) {
-
- }
-
-}
-
- // struct KeyExtractor {
- // typedef uint40 key_type;
- // typedef std::pair<uint40,uint40> value_type;
- // key_type m_key;
- //
- // KeyExtractor() {}
- // KeyExtractor(const key_type& k) : m_key(k) {}
- // key_type operator()(const value_type& v) const { return v.first; }
- // value_type min_value() const { return value_type(0,0); }
- // //value_type max_value() const { return std::make_pair(std::numeric_limits<key_type>::max(),0); }
- // value_type max_value() const { return value_type(m_key,0); }
- // };
-
-template<class pair_t>
- struct KeyExtractor {
- typedef pair_t value_type;
- typedef typename pair_t::first_type key_type;
- key_type m_key;
-
- KeyExtractor() {}
- KeyExtractor(const key_type& k) : m_key(k) {}
- key_type operator()(const value_type& v) const { return v.first; }
- value_type min_value() const { return pair_t(0, (typename value_type::second_type)0); }
- //value_type max_value() const { return std::make_pair(std::numeric_limits<key_type>::max(),0); }
- value_type max_value() const { return pair_t(m_key, (typename value_type::second_type)0); }
- };
-
-using namespace std;
-int main(int argc, char** argv) {
- if(argc != 2) {
- cout << "Usage: " << argv[0] << " text-file" << std::endl;
- return 1;
- }
- const std::string textfilename = argv[1];
- const std::string safilename = textfilename + ".sa5";
- const std::string isafilename = textfilename + ".isa5";
- const std::string bwtfilename = textfilename + ".bwt";
- if(!file_exists(textfilename.c_str())) {
- cout << "Could not open text file " << textfilename << std::endl;
- return 1;
- }
- if(!file_exists(safilename.c_str())) {
- cout << "Could not open SA file " << safilename << std::endl;
- return 1;
- }
- stxxl::VECTOR_GENERATOR<std::pair<uint40,uint40>>::result isa; // (text_position, factor_length)
- IntegerFileForwardIterator<uint40> safile { safilename.c_str() };
- while(safile.index() < safile.size()) {
- uint40 index = static_cast<uint64>(safile.index());
- isa.push_back(std::make_pair(*safile++,index));
- }
- stxxl::ksort(isa.begin(), isa.end(), KeyExtractor<std::pair<uint40,uint40>>(isa.size()),512*1024*1024); //, STXXL_DEFAULT_ALLOC_STRATEGY());
- std::ofstream isa_out(isafilename, std::ios::binary);
- for(auto it = isa.begin(); it != isa.end(); ++it) {
- isa_out.write((char*)(&it->second), sizeof(uint40));
- }
- isa_out.close();
-
- stxxl::VECTOR_GENERATOR<std::pair<uint40,char>>::result bwt;
- ifstream textfile(textfilename, ios::in | ios::binary);
- const uint40 isa_zero = isa.begin()->second;
- {
- auto it = isa.begin();
- ++it;
- for(; it != isa.end(); ++it) {
- bwt.push_back(std::make_pair(it->second+1, textfile.get()));
- DCHECK(textfile.good());
- }
- DCHECK(textfile.good());
- bwt.push_back(std::make_pair(isa_zero,textfile.get()));
- DCHECK(textfile.good());
- }
- stxxl::ksort(bwt.begin(), bwt.end(), KeyExtractor<std::pair<uint40,char>>(isa.size()),512*1024*1024); //, STXXL_DEFAULT_ALLOC_STRATEGY());
- std::ofstream bwt_out(bwtfilename, std::ios::binary);
- for(auto it = bwt.begin(); it != bwt.end(); ++it) {
- if(it->second == 0) bwt_out.put(1); // TODO BUG: prevent writing the 0-byte by writing 1
- else bwt_out.put(it->second);
- }
-
-
- // stxxl::VECTOR_GENERATOR<std::pair<uint40,uint40>>::result bwt; // (text_position, factor_length)
- // ifstream textfile(textfilename, ios::in | ios::binary);
-
- //
- // {
- // IntegerFileForwardIterator<uint40> sa { "/bighome/workspace/eSAIS/build/src/a.sa5" };
- // IntegerFileForwardIterator<uint40> isa { "/bighome/workspace/eSAIS/build/src/a.isa5" };
- // IntegerFileForwardIterator<uint40> plcp { "/bighome/workspace/eSAIS/build/src/a.plcp5" };
- // while(sa.index() < sa.size()) {
- // std::cout << *sa++ << "," << *isa++ << "," << *plcp++ << endl;
- // }
- // }
- // std::cout << endl;
- // {
- // IntegerFileArray<uint40> sa { "/bighome/workspace/eSAIS/build/src/a.sa5" };
- // IntegerFileArray<uint40> isa { "/bighome/workspace/eSAIS/build/src/a.isa5" };
- // IntegerFileArray<uint40> plcp { "/bighome/workspace/eSAIS/build/src/a.plcp5" };
- // for(size_t i = 0; i < sa.size(); ++i) {
- // std::cout << sa[i] << "," << isa[i] << "," << plcp[i] << endl;
- // }
- // }
-}
diff --git a/exttools/tools/readplcp.cpp b/exttools/tools/readplcp.cpp
deleted file mode 100644
index c58a5ca4..00000000
--- a/exttools/tools/readplcp.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-/***************************************************************************
- * test1.cpp
- *
- * Part of a simple STXXL example. See http://stxxl.sourceforge.net
- *
- * Copyright (C) 2013 Timo Bingmann <tb@panthema.net>
- *
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- **************************************************************************/
-
-#include <iostream>
-#include <limits>
-
-#include <stxxl/vector>
-#include <stxxl/random>
-#include <stxxl/sort>
-#include <stxxl/bits/algo/ksort.h>
-#include "/scripts/code/dcheck.hpp"
-
-#include <iostream>
-#include <fstream>
-#include <stxxl/bits/common/uint_types.h>
-#include "/home/niki/opt/include/sdsl/bits.hpp"
-
-size_t filesize( const char*const filepath ){
- std::ifstream file(filepath, std::ios::binary | std::ios::ate | std::ios::in);
- if(!file.good()) return 0;
- return file.tellg();
-}
-bool file_exists(const char *const filepath) {
- std::ifstream infile(filepath);
- return infile.good();
-}
-typedef size_t len_t;
-
-
-
-
-class PLCPFileForwardIterator {
- std::ifstream m_is;
-
- uint64_t m_chunk = 0; // current data chunk
- len_t m_idx = 0; // current select parameter
- len_t m_block = 0; // block index
- len_t m_blockrank = 0; //number of ones up to previous block
- uint_fast8_t m_ones; // number of ones in the current block `m_block`
-
- void read_chunk() {
- m_is.read(reinterpret_cast<char*>(&m_chunk), sizeof(decltype(m_chunk)));
- m_ones = sdsl::bits::cnt(m_chunk);
- }
-
- public:
- static constexpr const len_t eof = -1;
- PLCPFileForwardIterator(const char* filepath)
- : m_is(filepath)
- {
- read_chunk();
- }
-
- len_t index() const { return m_idx; }
- bool has_next() const {
- return m_is;
- }
-
- len_t next_select() {
- while(m_blockrank+m_ones < m_idx+1) {
- if(!m_is) {break;}
- ++m_block;
- m_blockrank += m_ones;
- read_chunk();
- }
- return 64*m_block + sdsl::bits::sel(m_chunk, m_idx+1-m_blockrank);
- }
- len_t operator()() {
- const len_t ret = next_select() - 2*m_idx;
- return ret;
- }
- void advance() {
- ++m_idx;
- }
-};
-
-template<class int_t>
-class IntegerFileForwardIterator {
- const size_t m_size;
- std::ifstream m_is;
- size_t m_index;
- char m_buf[sizeof(int_t)];
- public:
-
- IntegerFileForwardIterator(const char*const filename)
- : m_size { filesize(filename) }
- , m_is {filename, std::ios::binary | std::ios::in }
- , m_index {0}
- {}
-
- size_t size() const { return m_size/sizeof(int_t); }
- size_t index() const { return m_index; }
- int_t operator*() { return *reinterpret_cast<int_t*>(m_buf); }
- IntegerFileForwardIterator& operator++(int) {
- m_is.read(m_buf, sizeof(int_t));
- ++m_index;
- return *this;
- }
-};
-
-template<class int_t>
-class IntegerFileArray {
- const size_t m_size;
- std::ifstream m_is;
- public:
- IntegerFileArray(const char*const filename)
- : m_size { filesize(filename) }
- , m_is {filename, std::ios::binary | std::ios::in }
- {}
- int_t operator[](size_t i) {
-// DCHECK_LT(i, size());
- m_is.seekg(i*sizeof(int_t), std::ios_base::beg);
- char buf[sizeof(int_t)];
- m_is.read(buf, sizeof(int_t));
- return *reinterpret_cast<int_t*>(buf);
- }
- size_t size() const { return m_size/sizeof(int_t); }
-};
-
-
-using namespace std;
-int main(int argc, char** argv) {
- if(argc != 2) {
- cout << "Usage: " << argv[0] << " text-file" << std::endl;
- return 1;
- }
- const std::string textfilename = argv[1];
- const std::string plcpfilename = textfilename + ".plcp";
- if(!file_exists(plcpfilename.c_str())) {
- cout << "Could not open text file " << textfilename << std::endl;
- return 1;
- }
- PLCPFileForwardIterator p(plcpfilename.c_str());
- size_t i=0;
-// while(i < 100) {
- while(p.has_next()) {
- size_t entry = p();
-// if(!p.has_next()) break;
- std::cout << i++ << "->" << entry << "->" << p.has_next() << endl;
- p.advance();
- }
-
-
-}
diff --git a/exttools/tools/standardize.cpp b/exttools/tools/standardize.cpp
deleted file mode 100644
index 2148db85..00000000
--- a/exttools/tools/standardize.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <iostream>
-#include <fstream>
-using namespace std;
-
-size_t filesize( const char*const filepath ){
- std::ifstream file(filepath, std::ios::binary | std::ios::ate | std::ios::in);
- if(!file.good()) return 0;
- return file.tellg();
-}
-bool file_exists(const char *const filepath) {
- std::ifstream infile(filepath);
- return infile.good();
-}
-
-int main(int argc, char** argv) {
- if(argc != 2) {
- cout << "Usage: " << argv[0] << " text-file" << std::endl;
- return 1;
- }
- const std::string textfilename = argv[1];
- const std::string Ztextfilename = textfilename + ".0";
- if(!file_exists(textfilename.c_str())) {
- cout << "Could not open text file " << textfilename << std::endl;
- return 1;
- }
- std::ifstream is(textfilename, std::ios::binary);
- std::ofstream os(Ztextfilename, std::ios::binary);
- os << is.rdbuf();
- os.put(0);
-}
diff --git a/include/tudocomp/Coder.hpp b/include/tudocomp/Coder.hpp
index c66bf5f7..99ab41c4 100644
--- a/include/tudocomp/Coder.hpp
+++ b/include/tudocomp/Coder.hpp
@@ -26,7 +26,7 @@ public:
/// \param out The bit stream to write to.
/// \param literals The literal iterator.
template<typename literals_t>
- inline Encoder(Env&& env, std::shared_ptr<BitOStream> out, literals_t&& literals)
+ inline Encoder(Env&& env, std::shared_ptr<BitOStream> out, literals_t&&)
: Algorithm(std::move(env)), m_out(out) {
}
@@ -69,7 +69,7 @@ public:
/// \param v The value to encode.
/// \param r Unused.
template<typename value_t>
- inline void encode(value_t v, const BitRange& r) {
+ inline void encode(value_t v, const BitRange&) {
m_out->write_bit(v);
}
@@ -134,7 +134,7 @@ public:
/// \param r Unused.
/// \return The decoded bit value (zero or one).
template<typename value_t>
- inline value_t decode(const BitRange& r) {
+ inline value_t decode(const BitRange&) {
return value_t(m_in->read_bit());
}
diff --git a/include/tudocomp/Meta.hpp b/include/tudocomp/Meta.hpp
index ca1e75ef..4d926649 100644
--- a/include/tudocomp/Meta.hpp
+++ b/include/tudocomp/Meta.hpp
@@ -91,6 +91,8 @@ public:
/// \tparam T The Algorithm type.
template<class T>
inline void templated(const std::string& accepted_type) {
+ (void) accepted_type; // TODO: Actual use this parameter
+
m_meta.check_arg(m_argument_name);
Meta sub_meta = T::meta();
m_meta.m_sub_metas.push_back(sub_meta);
diff --git a/include/tudocomp/coders/ASCIICoder.hpp b/include/tudocomp/coders/ASCIICoder.hpp
index fc9b2866..d22bbafb 100644
--- a/include/tudocomp/coders/ASCIICoder.hpp
+++ b/include/tudocomp/coders/ASCIICoder.hpp
@@ -31,7 +31,7 @@ public:
using tdc::Encoder::Encoder;
template<typename value_t>
- inline void encode(value_t v, const Range& r) {
+ inline void encode(value_t v, const Range&) {
std::ostringstream s;
s << v;
for(uint8_t c : s.str()) m_out->write_int(c);
@@ -39,12 +39,12 @@ public:
}
template<typename value_t>
- inline void encode(value_t v, const LiteralRange& r) {
+ inline void encode(value_t v, const LiteralRange&) {
m_out->write_int(uint8_t(v));
}
template<typename value_t>
- inline void encode(value_t v, const BitRange& r) {
+ inline void encode(value_t v, const BitRange&) {
m_out->write_int(v ? '1' : '0');
}
};
@@ -55,7 +55,7 @@ public:
using tdc::Decoder::Decoder;
template<typename value_t>
- inline value_t decode(const Range& r) {
+ inline value_t decode(const Range&) {
std::ostringstream os;
for(uint8_t c = m_in->read_int<uint8_t>();
c >= '0' && c <= '9';
@@ -73,12 +73,12 @@ public:
}
template<typename value_t>
- inline value_t decode(const LiteralRange& r) {
+ inline value_t decode(const LiteralRange&) {
return value_t(m_in->read_int<uint8_t>());
}
template<typename value_t>
- inline value_t decode(const BitRange& r) {
+ inline value_t decode(const BitRange&) {
uint8_t b = m_in->read_int<uint8_t>();
return (b != '0');
}
diff --git a/include/tudocomp/coders/HuffmanCoder.hpp b/include/tudocomp/coders/HuffmanCoder.hpp
index 5538b13e..eddab809 100644
--- a/include/tudocomp/coders/HuffmanCoder.hpp
+++ b/include/tudocomp/coders/HuffmanCoder.hpp
@@ -38,7 +38,7 @@ namespace huff {
std::memset(C, 0, sizeof(len_t)*(ULITERAL_MAX+1));
while(input.has_next()) {
- literal_t c = input.next().c;
+ uliteral_t c = input.next().c;
DCHECK_LT(static_cast<uliteral_t>(c), ULITERAL_MAX+1);
DCHECK_LT(C[static_cast<uliteral_t>(c)], std::numeric_limits<len_t>::max());
++C[static_cast<uliteral_t>(c)];
@@ -325,7 +325,7 @@ namespace huff {
* Encodes a stream storing input_length characters
*/
inline void huffman_encode(
- std::basic_istream<literal_t>& input,
+ std::istream& input,
tdc::io::BitOStream& os,
const size_t input_length,
const uint8_t*const ordered_map_from_effective,
@@ -339,7 +339,7 @@ namespace huff {
{//now writing
os.write_compressed_int<size_t>(input_length);
- literal_t c;
+ char c;
while(input.get(c)) {
huffman_encode(c, os, ordered_codelengths, ordered_map_to_effective, alphabet_size, codewords);
}
@@ -369,7 +369,7 @@ namespace huff {
DVLOG(2) << "prefix_sum_lengths : " << arr_to_debug_string(prefix_sum_lengths, longest);
return prefix_sum_lengths;
}
- inline literal_t huffman_decode(
+ inline uliteral_t huffman_decode(
tdc::io::BitIStream& is,
const uliteral_t*const ordered_map_from_effective,
const size_t*const prefix_sum_lengths,
@@ -394,7 +394,7 @@ namespace huff {
inline void huffman_decode(
tdc::io::BitIStream& is,
- std::basic_ostream<literal_t>& output,
+ std::ostream& output,
const uliteral_t*const ordered_map_from_effective,
const uint8_t*const ordered_codelengths,
const size_t alphabet_size,
diff --git a/include/tudocomp/compressors/MTFCompressor.hpp b/include/tudocomp/compressors/MTFCompressor.hpp
index 2b5ebed5..f65967a6 100644
--- a/include/tudocomp/compressors/MTFCompressor.hpp
+++ b/include/tudocomp/compressors/MTFCompressor.hpp
@@ -42,7 +42,7 @@ value_type mtf_decode_char(const value_type v, value_type*const table) {
return return_value;
}
-template<class char_type = literal_t>
+template<class char_type = uliteral_t>
void mtf_encode(std::basic_istream<char_type>& is, std::basic_ostream<char_type>& os) {
typedef typename std::make_unsigned<char_type>::type value_type; // -> default: uint8_t
static constexpr size_t table_size = std::numeric_limits<value_type>::max()+1;
@@ -55,7 +55,7 @@ void mtf_encode(std::basic_istream<char_type>& is, std::basic_ostream<char_type>
}
}
-template<class char_type = literal_t>
+template<class char_type = uliteral_t>
void mtf_decode(std::basic_istream<char_type>& is, std::basic_ostream<char_type>& os) {
typedef typename std::make_unsigned<char_type>::type value_type; // -> default: uint8_t
static constexpr size_t table_size = std::numeric_limits<value_type>::max()+1;
@@ -66,7 +66,7 @@ void mtf_decode(std::basic_istream<char_type>& is, std::basic_ostream<char_type>
while(is.get(c)) {
os << mtf_decode_char(static_cast<value_type>(c), table);
}
-};
+}
class MTFCompressor : public Compressor {
public:
diff --git a/include/tudocomp/compressors/lcpcomp/compress/PLCPStrategy.hpp b/include/tudocomp/compressors/lcpcomp/compress/PLCPStrategy.hpp
index 17b177c7..fdaec2e9 100644
--- a/include/tudocomp/compressors/lcpcomp/compress/PLCPStrategy.hpp
+++ b/include/tudocomp/compressors/lcpcomp/compress/PLCPStrategy.hpp
@@ -25,8 +25,6 @@ size_t filesize( const char*const filepath ){
return file.tellg();
}
-#include <tudocomp_stat/StatPhase.hpp>
-
namespace tdc {
namespace lcpcomp {
@@ -242,7 +240,7 @@ class PLCPFileForwardIterator {
len_t index() const { return m_idx; }
bool has_next() const {
- return !m_is.fail();
+ return m_is;
}
len_t next_select() {
@@ -268,6 +266,8 @@ class PLCPFileForwardIterator {
template<class RefStrategy,class plcp_type>
void compute_references(const size_t n, RefStrategy& refStrategy, plcp_type& pplcp, size_t threshold) {
+ env().end_stat_phase();
+ env().begin_stat_phase("Search Peaks");
struct Poi {
len_t pos;
@@ -386,7 +386,8 @@ class PLCPFileForwardIterator {
// DCHECK_EQ(plcp[lastpos], plcp_i);
lastpos_lcp = plcp_i;
}
- IF_STATS(StatPhase::log("max heap size", max_heap_size));
+ IF_STATS(env().log_stat("max heap size", max_heap_size));
+ env().end_stat_phase();
}
@@ -426,15 +427,13 @@ public:
// refStrategy.factorize(refs);
// env().end_stat_phase();
// }
-
- inline static ds::dsflags_t textds_flags() {
- return text_t::SA | text_t::ISA;
- }
- inline void factorize(text_t& text, size_t threshold, lzss::FactorBuffer& refs) {
- StatPhase phase("Load Index DS");
+ inline void factorize(text_t& text,
+ size_t threshold,
+ lzss::FactorBuffer& refs) {
+ env().begin_stat_phase("Load index ds");
// const std::string textfilename = "/bighome/workspace/compreSuite/tudocomp/datasets/abracadabra.0";
- const std::string textfilename = "/local1/cc_commoncrawl.ascii.10MB.0";
+ const std::string textfilename = "/bighome/workspace/compreSuite/tudocomp/datasets/cc_commoncrawl.ascii.10MB.0";
IntegerFileArray<uint_t<40>> sa ((textfilename + ".sa5").c_str());
IntegerFileArray<uint_t<40>> isa ((textfilename + ".isa5").c_str());
// SAFileArray<uint_t<40>> sa((textfilename + ".sa5").c_str());
@@ -442,177 +441,44 @@ public:
//IntegerFileForwardIterator<uint_t<40>> pplcp("/bighome/workspace/compreSuite/tudocomp/datasets/pc_english.200MB.plcp5");
DCHECK_EQ(sa.size(), text.size());
-IF_DEBUG(
- StatPhase::wrap("Check Index DS", [&]{
- const auto& tsa = text.require_sa();
- const auto& tisa = text.require_isa();
- const auto& plcp = text.require_plcp();
- PLCPFileForwardIterator pplcp ((textfilename + ".plcp").c_str());
- for(size_t i = 0; i < sa.size(); ++i) {
- DCHECK_EQ(sa.size(),tsa.size());
- DCHECK_EQ(sa[i], (uint64_t)tsa[i]);
- }
- DCHECK_EQ(isa.size(),tisa.size());
- for(size_t i = 0; i < isa.size(); ++i) {
- DCHECK_EQ(isa[i], (uint64_t)tisa[i]);
- }
- for(size_t i = 0; i < plcp.size()-1; ++i) {
- DCHECK_EQ(pplcp(),(uint64_t) plcp[i]);
- pplcp.advance();
- }
- });
- );
+//IF_DEBUG({
+ {
+ env().begin_stat_phase("Construct index ds");
+ text.require(text_t::SA | text_t::ISA | text_t::PLCP);
+
+ const auto& tsa = text.require_sa();
+ const auto& tisa = text.require_isa();
+ const auto& plcp = text.require_plcp();
+ PLCPFileForwardIterator pplcp ((textfilename + ".plcp").c_str());
+ for(size_t i = 0; i < sa.size(); ++i) {
+ DCHECK_EQ(sa.size(),tsa.size());
+ DCHECK_EQ(sa[i], (uint64_t)tsa[i]);
+ }
+ DCHECK_EQ(isa.size(),tisa.size());
+ for(size_t i = 0; i < isa.size(); ++i) {
+ DCHECK_EQ(isa[i], (uint64_t)tisa[i]);
+ }
+ for(size_t i = 0; i < plcp.size()-1; ++i) {
+ DCHECK_EQ(pplcp(),(uint64_t) plcp[i]);
+ pplcp.advance();
+ }
+ env().end_stat_phase();
+ env().begin_stat_phase("Check");
+
+
+ env().end_stat_phase();
+ }
+// })//DEBUG
PLCPFileForwardIterator pplcp ((textfilename + ".plcp").c_str());
RefDiskStrategy<decltype(sa),decltype(isa)> refStrategy(sa,isa);
- StatPhase::wrap("Search Peaks", [&]{
- compute_references(text.size()-1, refStrategy, pplcp, threshold);
- });
- StatPhase::wrap("Compute References", [&]{
- refStrategy.factorize(refs);
- });
+ compute_references(text.size()-1, refStrategy, pplcp, threshold);
+ env().begin_stat_phase("Compute References");
+ refStrategy.factorize(refs);
+ env().end_stat_phase();
- }
- //
- // inline void factorize(text_t& text,
- // size_t threshold,
- // lzss::FactorBuffer& factors) {
- //
- // // Construct SA, ISA and LCP
- // auto pplcp = StatPhase::wrap("Construct index ds", [&]{
- // text.require(text_t::SA | text_t::ISA);
- // const auto& sa = text.require_sa();
- // return LCPForwardIterator { (construct_plcp_bitvector(env(), sa, text)) };
- // });
- //
- // const auto& sa = text.require_sa();
- // const auto& isa = text.require_isa();
- // const len_t n = sa.size();
- //
- // StatPhase::wrap("Search Peaks", [&]{
- //
- // struct Poi {
- // len_t pos;
- // len_t lcp;
- // len_t no;
- // Poi(len_t _pos, len_t _lcp, len_t _no) : pos(_pos), lcp(_lcp), no(_no) {}
- // bool operator<(const Poi& o) const {
- // DCHECK_NE(o.pos, this->pos);
- // if(o.lcp == this->lcp) return this->pos > o.pos;
- // return this->lcp < o.lcp;
- // }
- // };
- //
- // boost::heap::pairing_heap<Poi> heap;
- // std::vector<boost::heap::pairing_heap<Poi>::handle_type> handles;
- //
- // IF_STATS(len_t max_heap_size = 0);
- //
- // // std::stack<poi> pois; // text positions of interest, i.e., starting positions of factors we want to replace
- //
- // len_t lastpos = 0;
- // len_t lastpos_lcp = 0;
- // for(len_t i = 0; i+1 < n; ++i) {
- // while(pplcp.index() < i) pplcp.advance();
- // const len_t plcp_i = pplcp(); DCHECK_EQ(pplcp.index(), i);
- // if(heap.empty()) {
- // if(plcp_i >= threshold) {
- // handles.emplace_back(heap.emplace(i, plcp_i, handles.size()));
- // lastpos = i;
- // lastpos_lcp = plcp_i;
- // }
- // continue;
- // }
- // if(i - lastpos >= lastpos_lcp || tdc_unlikely(i+1 == n)) {
- // IF_DEBUG(bool first = true);
- // IF_STATS(max_heap_size = std::max<len_t>(max_heap_size, heap.size()));
- // DCHECK_EQ(heap.size(), handles.size());
- // while(!heap.empty()) {
- // const Poi& top = heap.top();
- // const len_t source_position = sa[isa[top.pos]-1];
- // factors.emplace_back(top.pos, source_position, top.lcp);
- // const len_t next_pos = top.pos; // store top, this is the current position that gets factorized
- // IF_DEBUG(if(first) DCHECK_EQ(top.pos, lastpos); first = false;)
- //
- // {
- // len_t newlcp_peak = 0; // a new peak can emerge at top.pos+top.lcp
- // bool peak_exists = false;
- // if(top.pos+top.lcp < i)
- // for(len_t j = top.no+1; j < handles.size(); ++j) { // erase all right peaks that got substituted
- // if( handles[j].node_ == nullptr) continue;
- // const Poi poi = *(handles[j]);
- // DCHECK_LT(next_pos, poi.pos);
- // if(poi.pos < next_pos+top.lcp) {
- // heap.erase(handles[j]);
- // handles[j].node_ = nullptr;
- // if(poi.lcp + poi.pos > next_pos+top.lcp) {
- // const len_t remaining_lcp = poi.lcp+poi.pos - (next_pos+top.lcp);
- // DCHECK_NE(remaining_lcp,0);
- // if(newlcp_peak != 0) DCHECK_LE(remaining_lcp, newlcp_peak);
- // newlcp_peak = std::max(remaining_lcp, newlcp_peak);
- // }
- // } else if( poi.pos == next_pos+top.lcp) { peak_exists=true; }
- // else { break; } // only for performance
- // }
- // #ifdef DEBUG
- // if(peak_exists) { //TODO: DEBUG
- // for(len_t j = top.no+1; j < handles.size(); ++j) {
- // if( handles[j].node_ == nullptr) continue;
- // const Poi& poi = *(handles[j]);
- // if(poi.pos == next_pos+top.lcp) {
- // DCHECK_LE(newlcp_peak, poi.lcp);
- // break;
- // }
- // }
- // }
- // #endif
- // if(!peak_exists && newlcp_peak >= threshold) {
- // len_t j = top.no+1;
- // DCHECK(handles[j].node_ == nullptr);
- // handles[j] = heap.emplace(next_pos+top.lcp, newlcp_peak, j);
- // }
- //
- // }
- // handles[top.no].node_ = nullptr;
- // heap.pop(); // top now gets erased
- //
- // for(auto it = handles.rbegin(); it != handles.rend(); ++it) {
- // if( (*it).node_ == nullptr) continue;
- // Poi& poi = (*(*it));
- // if(poi.pos > next_pos) continue;
- // const len_t newlcp = next_pos - poi.pos;
- // if(newlcp < poi.lcp) {
- // if(newlcp < threshold) {
- // heap.erase(*it);
- // it->node_ = nullptr;
- // } else {
- // poi.lcp = newlcp;
- // heap.decrease(*it);
- //
- // }
- // } else {
- // break;
- // }
- // }
- // }
- // handles.clear();
- // --i;
- // continue;
- // }
- // DCHECK_EQ(pplcp.index(), i);
- // DCHECK_EQ(plcp_i, pplcp());
- // if(plcp_i <= lastpos_lcp) continue;
- // DCHECK_LE(threshold, plcp_i);
- // handles.emplace_back(heap.emplace(i,plcp_i, handles.size()));
- // lastpos = i;
- // // DCHECK_EQ(plcp[lastpos], plcp_i);
- // lastpos_lcp = plcp_i;
- // }
- // IF_STATS(StatPhase::log("max heap size", max_heap_size));
- //
- // });
- //}
+ }
};
diff --git a/include/tudocomp/compressors/lz78/CedarTrie.hpp b/include/tudocomp/compressors/lz78/CedarTrie.hpp
index 6f0d23f8..af175910 100644
--- a/include/tudocomp/compressors/lz78/CedarTrie.hpp
+++ b/include/tudocomp/compressors/lz78/CedarTrie.hpp
@@ -39,12 +39,10 @@ class LzwRootSearchPosMap {
std::array<CedarSearchPos, 256> m_array;
public:
inline CedarSearchPos get(uliteral_t c) {
- DCHECK(0 <= c);
DCHECK(c < m_array.size());
return m_array[c];
}
inline void set(uliteral_t c, CedarSearchPos v) {
- DCHECK(0 <= c);
DCHECK(c < m_array.size());
m_array[c] = v;
}
@@ -181,7 +179,7 @@ public:
return m;
}
- CedarTrie(Env&& env, const size_t n, const size_t& remaining_characters, factorid_t reserve = 0)
+ CedarTrie(Env&& env, const size_t n, const size_t& remaining_characters, factorid_t = 0)
: Algorithm(std::move(env))
, LZ78Trie(n, remaining_characters)
, m_trie(std::make_unique<cedar_t>()) {}
diff --git a/include/tudocomp/compressors/lz78/squeeze_node.hpp b/include/tudocomp/compressors/lz78/squeeze_node.hpp
index b8aef03d..2b13c7bf 100644
--- a/include/tudocomp/compressors/lz78/squeeze_node.hpp
+++ b/include/tudocomp/compressors/lz78/squeeze_node.hpp
@@ -16,13 +16,13 @@ typedef uint_t<40> squeeze_node_t; // TODO: change this to bits_for(literal_t) +
#ifndef ALPHABET_BITS
- #define ALPHABET_BITS (sizeof(literal_t)*8)
+ #define ALPHABET_BITS (sizeof(uliteral_t)*8)
#endif //TODO alphabet_bits -> effective alphabet size
inline factorid_t get_id(squeeze_node_t data) {
return static_cast<uint64_t>(data)>>ALPHABET_BITS;
}
-inline literal_t get_letter(squeeze_node_t data) {
+inline uliteral_t get_letter(squeeze_node_t data) {
return static_cast<char>(static_cast<uint64_t>(data)) & 0xff; //TODO 0xff hard coded
}
inline squeeze_node_t create_node(factorid_t id, uliteral_t c) {
diff --git a/include/tudocomp/def.hpp b/include/tudocomp/def.hpp
index 8aa5a9c5..75f6b772 100644
--- a/include/tudocomp/def.hpp
+++ b/include/tudocomp/def.hpp
@@ -65,30 +65,28 @@ namespace tdc {
constexpr size_t LEN_BITS = 8 * sizeof(len_t);
/// Type to represent signed single literals.
- typedef char literal_t;
-
- /// Type to represent unsigned single literals.
- typedef std::make_unsigned<literal_t>::type uliteral_t;
+ typedef uint8_t uliteral_t;
/// The maximum value of \ref uliteral_t.
constexpr size_t ULITERAL_MAX = std::numeric_limits<uliteral_t>::max();
- /// Converts a literal to an unsigned integer value.
+ /// Converts a literal to an integer value as if unsigned.
///
- /// \tparam the literal type.
+ /// \tparam T the integer type.
/// \param c the literal.
/// \return the corresponding unsigned integer value.
- template<class T>
- inline size_t literal2int(const T& c) {
- return static_cast<size_t>(c);
- }
+ template<typename T = size_t>
+ constexpr T literal2int(uliteral_t c) {
+ return std::make_unsigned_t<T>(c);
+ }
- /// Converts a signed literal to an unsigned integer value.
+ /// Converts an integer value to a literal as if unsigned.
///
- /// \param c the literal.
- /// \return the corresponding unsigned integer value.
- template<>
- inline size_t literal2int(const literal_t& c) {
- return static_cast<size_t>(static_cast<uliteral_t>(c));
- }
+ /// \tparam T the integer type.
+ /// \param c the integer value.
+ /// \return the corresponding literal.
+ template<typename T = size_t>
+ constexpr uliteral_t int2literal(const T& c) {
+ return std::make_unsigned_t<T>(c);
+ }
}
diff --git a/include/tudocomp/ds/BitPackingVector.hpp b/include/tudocomp/ds/BitPackingVector.hpp
index 2108ba6e..89109ead 100644
--- a/include/tudocomp/ds/BitPackingVector.hpp
+++ b/include/tudocomp/ds/BitPackingVector.hpp
@@ -53,7 +53,7 @@ namespace int_vector {
m_vec(std::move(other.m_vec)), m_real_size(other.m_real_size) {}
inline uint8_t raw_width() const { return N; }
- inline void set_width_raw(uint8_t width) { }
+ inline void set_width_raw(uint8_t) { }
};
diff --git a/include/tudocomp/ds/IntPtr.hpp b/include/tudocomp/ds/IntPtr.hpp
index 4b1a157a..74dffcce 100644
--- a/include/tudocomp/ds/IntPtr.hpp
+++ b/include/tudocomp/ds/IntPtr.hpp
@@ -38,7 +38,7 @@ namespace tdc {
sdsl::bits::write_int(word, x, offset, len);
}
template<>
- inline void write_int<uint_t<1>>(uint64_t* word, uint64_t v, uint8_t o, const uint8_t len) {
+ inline void write_int<uint_t<1>>(uint64_t* word, uint64_t v, uint8_t o, const uint8_t) {
auto& p = *word;
const auto mask = uint64_t(1) << o;
@@ -53,7 +53,7 @@ namespace tdc {
}
template<>
- inline uint64_t read_int<uint_t<1>>(const uint64_t* word, uint8_t o, const uint8_t len) {
+ inline uint64_t read_int<uint_t<1>>(const uint64_t* word, uint8_t o, const uint8_t) {
const auto p = *word;
const auto mask = uint64_t(1) << o;
@@ -71,7 +71,7 @@ struct RefDispatch {
v,
self.m_ptr.m_bit_offset,
self.m_ptr.data_bit_size());
- };
+ }
template<class Ref, class R>
inline static R cast_for_op(const Ref& self) {
@@ -159,7 +159,7 @@ namespace int_vector {
private:
//const uint8_t m_bit_size;
public:
- Data(const DynamicIntValueType* ptr, uint8_t offset, uint8_t size):
+ Data(const DynamicIntValueType* ptr, uint8_t offset, uint8_t /*size*/):
m_ptr(ptr), m_bit_offset(offset) /*, m_bit_size(size)*/ {}
inline uint8_t data_bit_size() const { return N; }
inline Data data_offset_to(const DynamicIntValueType* ptr, uint8_t offset) const {
@@ -177,7 +177,7 @@ namespace int_vector {
private:
//const uint8_t m_bit_size;
public:
- Data(DynamicIntValueType* ptr, uint8_t offset, uint8_t size):
+ Data(DynamicIntValueType* ptr, uint8_t offset, uint8_t /*size*/):
m_ptr(ptr), m_bit_offset(offset) /*, m_bit_size(size)*/ {}
inline uint8_t data_bit_size() const { return N; }
inline Data data_offset_to(DynamicIntValueType* ptr, uint8_t offset) {
@@ -448,7 +448,7 @@ namespace int_vector {
template<class T>
inline IntRef<T>& IntRef<T>::operator=(const ConstIntRef<T>& other) {
return operator=(value_type(other));
- };
+ }
template<class T>
inline IntRef<T> IntPtr<T>::operator*() {
diff --git a/include/tudocomp/ds/LCPSada.hpp b/include/tudocomp/ds/LCPSada.hpp
index 33a3cdfa..6f935317 100644
--- a/include/tudocomp/ds/LCPSada.hpp
+++ b/include/tudocomp/ds/LCPSada.hpp
@@ -174,7 +174,7 @@ inline static sdsl::bit_vector construct_plcp_bitvector(const plcp_t& plcp) {
}
template<class sa_t, class text_t, class select_t = sdsl::select_support_mcl<1,1>>
-sdsl::bit_vector construct_plcp_bitvector(Env& env, const sa_t& sa, const text_t& text) {
+sdsl::bit_vector construct_plcp_bitvector(Env&, const sa_t& sa, const text_t& text) {
typedef DynamicIntVector phi_t;
phi_t phi = StatPhase::wrap("Construct Phi Array", [&]{
diff --git a/include/tudocomp/ds/dynamic_t.hpp b/include/tudocomp/ds/dynamic_t.hpp
index 180940aa..9007ef79 100644
--- a/include/tudocomp/ds/dynamic_t.hpp
+++ b/include/tudocomp/ds/dynamic_t.hpp
@@ -14,7 +14,7 @@ struct DyntDispatch {
template<class Ref, class V>
inline static void assign(Ref& self, V v) {
self.m_data = v;
- };
+ }
template<class Ref, class R>
inline static R cast_for_op(const Ref& self) {
diff --git a/include/tudocomp/ds/uint_t.hpp b/include/tudocomp/ds/uint_t.hpp
index a2fd364a..65cb8085 100644
--- a/include/tudocomp/ds/uint_t.hpp
+++ b/include/tudocomp/ds/uint_t.hpp
@@ -17,7 +17,7 @@ struct UinttDispatch {
template<class Ref, class V>
inline static void assign(Ref& self, V v) {
self.m_data = v;
- };
+ }
template<class Ref, class R>
inline static R cast_for_op(const Ref& self) {
diff --git a/include/tudocomp/io/InputSource.hpp b/include/tudocomp/io/InputSource.hpp
index 59676e26..a0a36682 100644
--- a/include/tudocomp/io/InputSource.hpp
+++ b/include/tudocomp/io/InputSource.hpp
@@ -61,7 +61,7 @@ namespace tdc {namespace io {
&& lhs.m_view.size() == rhs.m_view.size()
&& lhs.m_path == rhs.m_path
&& lhs.m_stream == rhs.m_stream;
- };
+ }
inline std::ostream& operator<<(std::ostream& o, const InputSource& v) {
if (v.is_view()) {
diff --git a/include/tudocomp/io/ViewStream.hpp b/include/tudocomp/io/ViewStream.hpp
index cb9f1657..1cca02ce 100644
--- a/include/tudocomp/io/ViewStream.hpp
+++ b/include/tudocomp/io/ViewStream.hpp
@@ -28,6 +28,8 @@ class ViewStream {
virtual inline std::streampos seekpos(std::streampos sp,
std::ios_base::openmode which) override
{
+ DCHECK(which == (std::ios_base::in | std::ios_base::out));
+
auto begin = eback();
auto end = egptr();
if ((size_t(begin) + sp) > size_t(end)) {
diff --git a/include/tudocomp/pre_header/GenericViewBase.hpp b/include/tudocomp/pre_header/GenericViewBase.hpp
index 946d8922..d5dab002 100644
--- a/include/tudocomp/pre_header/GenericViewBase.hpp
+++ b/include/tudocomp/pre_header/GenericViewBase.hpp
@@ -58,7 +58,7 @@ protected:
}
}
- inline void debug_bound_check(size_t pos) const {
+ inline void debug_bound_check(size_t IF_DEBUG(pos)) const {
IF_DEBUG(bound_check(pos));
}
diff --git a/include/tudocomp/util/Hash.hpp b/include/tudocomp/util/Hash.hpp
index 06420ef7..4fd38ffc 100644
--- a/include/tudocomp/util/Hash.hpp
+++ b/include/tudocomp/util/Hash.hpp
@@ -393,7 +393,7 @@ class HashMap {
public:
IF_STATS(
size_t collisions() const { return m_collisions; }
- void collect_stats(Env& env) const {
+ void collect_stats(Env&) const {
StatPhase::log("collisions", collisions());
StatPhase::log("table size", table_size());
StatPhase::log("load factor", max_load_factor());
@@ -534,7 +534,7 @@ class HashMap {
if(tdc_unlikely(table_size()*max_load_factor() < m_entries)) {
auto toinsert = std::make_pair(m_keys[tablepos], m_values[tablepos]);
- size_t expected_size =
+ size_t expected_size =
std::is_same<SizeManager,SizeManagerDirect>::value ?
(m_entries + 3.0/2.0*lz78_expected_number_of_remaining_elements(entries(),m_n,m_remaining_characters))/0.95 :
(m_entries + lz78_expected_number_of_remaining_elements(entries(),m_n,m_remaining_characters))/0.95;
diff --git a/include/tudocomp/util/divsufsort.hpp b/include/tudocomp/util/divsufsort.hpp
index 03384fbd..7caa19eb 100644
--- a/include/tudocomp/util/divsufsort.hpp
+++ b/include/tudocomp/util/divsufsort.hpp
@@ -28,11 +28,11 @@
#pragma once
-#include <tudocomp/util/divsufsort_def.hpp>
-#include <tudocomp/util/divsufsort_private.hpp>
-#include <tudocomp/util/divsufsort_ssort.hpp>
-#include <tudocomp/util/divsufsort_trsort.hpp>
-#include <tudocomp/util/divsufsort_bufwrapper.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_def.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_private.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_ssort.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_trsort.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_bufwrapper.hpp>
#include <tudocomp/ds/IntVector.hpp>
diff --git a/include/tudocomp/util/divsufsort_bufwrapper.hpp b/include/tudocomp/util/divsufsort/divsufsort_bufwrapper.hpp
similarity index 97%
rename from include/tudocomp/util/divsufsort_bufwrapper.hpp
rename to include/tudocomp/util/divsufsort/divsufsort_bufwrapper.hpp
index 069f0d9b..81653abe 100644
--- a/include/tudocomp/util/divsufsort_bufwrapper.hpp
+++ b/include/tudocomp/util/divsufsort/divsufsort_bufwrapper.hpp
@@ -1,6 +1,6 @@
#pragma once
-#include <tudocomp/util/divsufsort_def.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_def.hpp>
#include <tudocomp/ds/IntVector.hpp>
namespace tdc {
diff --git a/include/tudocomp/util/divsufsort_def.hpp b/include/tudocomp/util/divsufsort/divsufsort_def.hpp
similarity index 97%
rename from include/tudocomp/util/divsufsort_def.hpp
rename to include/tudocomp/util/divsufsort/divsufsort_def.hpp
index 9f93f9ad..78343944 100644
--- a/include/tudocomp/util/divsufsort_def.hpp
+++ b/include/tudocomp/util/divsufsort/divsufsort_def.hpp
@@ -6,7 +6,7 @@ namespace tdc {
namespace libdivsufsort {
// core type definitions
-using saidx_t = std::make_signed<len_t>::type;
+using saidx_t = ssize_t;
using saint_t = int;
using sauchar_t = uliteral_t;
diff --git a/include/tudocomp/util/divsufsort_private.hpp b/include/tudocomp/util/divsufsort/divsufsort_private.hpp
similarity index 100%
rename from include/tudocomp/util/divsufsort_private.hpp
rename to include/tudocomp/util/divsufsort/divsufsort_private.hpp
diff --git a/include/tudocomp/util/divsufsort_ssort.hpp b/include/tudocomp/util/divsufsort/divsufsort_ssort.hpp
similarity index 99%
rename from include/tudocomp/util/divsufsort_ssort.hpp
rename to include/tudocomp/util/divsufsort/divsufsort_ssort.hpp
index b213146e..2bfa89e4 100644
--- a/include/tudocomp/util/divsufsort_ssort.hpp
+++ b/include/tudocomp/util/divsufsort/divsufsort_ssort.hpp
@@ -28,8 +28,8 @@
#pragma once
-#include <tudocomp/util/divsufsort_def.hpp>
-#include <tudocomp/util/divsufsort_private.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_def.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_private.hpp>
namespace tdc {
namespace libdivsufsort {
diff --git a/include/tudocomp/util/divsufsort_trsort.hpp b/include/tudocomp/util/divsufsort/divsufsort_trsort.hpp
similarity index 99%
rename from include/tudocomp/util/divsufsort_trsort.hpp
rename to include/tudocomp/util/divsufsort/divsufsort_trsort.hpp
index 1e4ddc33..f06b739b 100644
--- a/include/tudocomp/util/divsufsort_trsort.hpp
+++ b/include/tudocomp/util/divsufsort/divsufsort_trsort.hpp
@@ -28,8 +28,8 @@
#pragma once
-#include <tudocomp/util/divsufsort_def.hpp>
-#include <tudocomp/util/divsufsort_private.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_def.hpp>
+#include <tudocomp/util/divsufsort/divsufsort_private.hpp>
namespace tdc {
namespace libdivsufsort {
diff --git a/include/tudocomp_stat/Json.hpp b/include/tudocomp_stat/Json.hpp
index 5818ad8e..1015e7d4 100644
--- a/include/tudocomp_stat/Json.hpp
+++ b/include/tudocomp_stat/Json.hpp
@@ -53,7 +53,7 @@ public:
///
/// \return the string representation of the contained value
virtual inline void str(
- std::ostream& s, unsigned int level = 0) const override {
+ std::ostream& s, unsigned int = 0) const override {
s << m_value;
}
@@ -64,7 +64,7 @@ const char quote_char = '\"';
const std::string quote_escape = "\\\"";
template<>
-inline void TValue<char>::str(std::ostream& s, unsigned int level) const {
+inline void TValue<char>::str(std::ostream& s, unsigned int) const {
s << quote_char;
if(m_value == quote_char) {
@@ -89,10 +89,10 @@ inline TValue<std::string>::TValue(const std::string& value) {
m_value.replace(x, 1, quote_escape);
pos = x+2;
}
-};
+}
template<>
-inline void TValue<std::string>::str(std::ostream& s, unsigned int level) const {
+inline void TValue<std::string>::str(std::ostream& s, unsigned int) const {
s << quote_char << m_value << quote_char;
}
/// \endcond
diff --git a/test/doc_snippets/coder_impl.cpp b/test/doc_snippets/coder_impl.cpp
index aae21d85..3fc302ae 100644
--- a/test/doc_snippets/coder_impl.cpp
+++ b/test/doc_snippets/coder_impl.cpp
@@ -56,7 +56,7 @@ public:
}
template<typename value_t>
- inline void encode(value_t v, const BitRange& r) {
+ inline void encode(value_t v, const BitRange&) {
// Encode single bits as ASCII
m_out->write_int(v ? '1' : '0');
}
@@ -82,7 +82,7 @@ public:
}
template<typename value_t>
- inline value_t decode(const BitRange& r) {
+ inline value_t decode(const BitRange&) {
// Decode an ASCII character and compare against '0'
uint8_t b = m_in->read_int<uint8_t>();
return (b != '0');
@@ -92,7 +92,7 @@ public:
TEST(doc_coder_impl, test) {
std::stringstream ss;
-
+
Range r1(75, 125);
Range r2(699, 702);
diff --git a/test/doc_snippets/stats.cpp b/test/doc_snippets/stats.cpp
index fc1b1372..6d9f22ab 100644
--- a/test/doc_snippets/stats.cpp
+++ b/test/doc_snippets/stats.cpp
@@ -54,7 +54,7 @@ TEST(stats, example) {
StatPhase::wrap("Phase 3", []{
// Phase 3.1 yields a complex result
StatPhase sub_phase("Phase 3.1");
-
+
char* result_part_1 = new char[1024];
char* result_part_2 = new char[2048];
std::this_thread::sleep_for(std::chrono::milliseconds(40));
@@ -76,7 +76,7 @@ TEST(stats, pause_resume) {
// Allocate memory, but only track mem2
StatPhase::pause_tracking();
char* mem1 = new char[1024];
- StatPhase::resume_tracking();
+ StatPhase::resume_tracking();
char* mem2 = new char[2048];
diff --git a/test/ds_tests.cpp b/test/ds_tests.cpp
index fb2495a0..c117dafa 100644
--- a/test/ds_tests.cpp
+++ b/test/ds_tests.cpp
@@ -87,7 +87,7 @@ void test_sa(const std::string& str, textds_t& t) {
}
template<class textds_t>
-void test_isa(const std::string& str, textds_t& t) {
+void test_isa(const std::string&, textds_t& t) {
auto& isa = t.require_isa();
auto& sa = t.require_sa(); //request afterwards!
diff --git a/test/generic_int_vector_tests.cpp b/test/generic_int_vector_tests.cpp
index 202d1b57..23e6c03f 100644
--- a/test/generic_int_vector_tests.cpp
+++ b/test/generic_int_vector_tests.cpp
@@ -279,7 +279,7 @@ namespace tdc {
template<class Ref, class V>
inline static void assign(Ref& self, V v) {
*self.m_ptr = v;
- };
+ }
template<class Ref, class R>
inline static R cast_for_op(const Ref& self) {
@@ -540,7 +540,7 @@ TEST(integer_base, bit_ops_assign) {
template<class T> struct tn { constexpr const static char* str = "unknown"; };
template<> struct tn<uint32_t> { constexpr const static char* str = "uint32_t"; };
template<> struct tn<uint64_t> { constexpr const static char* str = "uint64_t"; };
-template<class T> std::string type_name(T t) { return tn<T>::str; }
+template<class T> std::string type_name(T) { return tn<T>::str; }
TEST(uint_t, b24) {
uint_t<24> v;
diff --git a/test/test/driver_util.hpp b/test/test/driver_util.hpp
index 7535c441..793a2985 100644
--- a/test/test/driver_util.hpp
+++ b/test/test/driver_util.hpp
@@ -139,16 +139,16 @@ std::string format_escape(const std::string& s) {
}
struct Error {
- bool has_error;
- std::string test;
- std::string message;
- std::string compress_cmd;
- std::string compress_stdout;
- std::string decompress_cmd;
- std::string decompress_stdout;
- std::string text;
- std::string roundtrip_text;
- std::string algo;
+ bool has_error = false;
+ std::string test = "";
+ std::string message = "";
+ std::string compress_cmd = "";
+ std::string compress_stdout = "";
+ std::string decompress_cmd = "";
+ std::string decompress_stdout = "";
+ std::string text = "";
+ std::string roundtrip_text = "";
+ std::string algo = "";
void print_error() {
auto& e = *this;
diff --git a/test/tudocomp_driver_tests.cpp b/test/tudocomp_driver_tests.cpp
index a17f101a..f553a23c 100644
--- a/test/tudocomp_driver_tests.cpp
+++ b/test/tudocomp_driver_tests.cpp
@@ -158,7 +158,7 @@ TEST(Registry, dynamic_options) {
}
using Compressor::Compressor;
- inline virtual void decompress(Input& input, Output& output) {}
+ inline virtual void decompress(Input&, Output&) {}
inline virtual void compress(Input& input, Output& output) {
auto s = output.as_stream();
diff --git a/test/tudocomp_tests.cpp b/test/tudocomp_tests.cpp
index 6207706d..eb303ef8 100644
--- a/test/tudocomp_tests.cpp
+++ b/test/tudocomp_tests.cpp
@@ -342,7 +342,7 @@ namespace input_nte_matrix {
i_copy_strat(std::move(input),
expected_output,
- [](Input& i) {},
+ [](Input&) {},
i_out_compare);
}
@@ -1188,9 +1188,9 @@ struct MyCompressor: public Compressor {
Compressor(std::move(env)),
custom_data(std::move(s)) {}
- inline virtual void decompress(Input& input, Output& output) {}
+ inline virtual void decompress(Input&, Output&) {}
- inline virtual void compress(Input& input, Output& output) {
+ inline virtual void compress(Input&, Output& output) {
A a(env().env_for_option("sub"));
auto s = output.as_stream();
s << "ok! " << custom_data << " " << env().option("dyn").as_string();
@@ -1327,8 +1327,8 @@ struct EscapingComp: public Compressor {
using Compressor::Compressor;
- virtual void compress(Input& i, Output& o) {}
- virtual void decompress(Input& i, Output& o) {}
+ virtual void compress(Input&, Output&) {}
+ virtual void decompress(Input&, Output&) {}
};
TEST(Escaping, option_value_direct) {
@@ -1430,10 +1430,10 @@ struct KeywordlessEvalOrderBug: public Compressor {
KeywordlessEvalOrderBug(Env&& env): Compressor(std::move(env)){}
- inline virtual void decompress(Input& input, Output& output) {
+ inline virtual void decompress(Input&, Output&) {
}
- inline virtual void compress(Input& input, Output& output) {
+ inline virtual void compress(Input&, Output&) {
auto a = env().option("sub1").as_algorithm();
auto b = env().option("dyn").as_string();
auto c = env().option("sub2").as_algorithm();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment