Toparvion/CompressTest.java

## CompressTest.java
package pro.toparvion.stegotext.compress;

import org.apache.commons.compress.compressors.CompressorException;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.commons.compress.utils.IOUtils;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static java.nio.file.StandardOpenOption.*;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS;

/**
 * A simple benchmark for comparing compression ratios of various compressing algorithms applied to a natural text
 *
 * @author Toparvion
 */
@TestInstance(PER_CLASS)
public class CompressTest {
  private static final Logger log = LoggerFactory.getLogger(CompressTest.class);

  private static final List<Integer> INPUT_SIZES =
      List.of(20, 50, 100, 200, 300, 500, 1000, 2000, 5000, 10_000, 50_000, 100_000);
  private static final CompressorStreamFactory COMPRESSOR_FACTORY = new CompressorStreamFactory();
  private static final Path SOURCE_TEXT_PATH = Path.of("sandbox/doyle-return-388.txt");
//  private static final Path SOURCE_TEXT_PATH = Path.of("sandbox/pepko.txt");

  private final List<List<String>> csvOut = new ArrayList<>();
  private byte[] textSampleBytes;

  @BeforeAll
  void beforeAll() throws IOException {
    List<String> header = new ArrayList<>(INPUT_SIZES.size() + 1);
    header.add("Algo");
    INPUT_SIZES.stream()
        .map(String::valueOf)
        .forEach(header::add);
    csvOut.add(header);

    textSampleBytes = Files.readAllBytes(SOURCE_TEXT_PATH);
  }

  @AfterAll
  void afterAll() throws IOException {
    List<String> csvLines = csvOut.stream()
        .map(line -> String.join(",", line))
        .toList();
    Path csvFilePath = Path.of("compress.csv");
    Files.write(csvFilePath, csvLines, CREATE, WRITE, TRUNCATE_EXISTING);
    log.info("Written {} lines to '{}'", csvLines.size(), csvFilePath);
  }

  @ParameterizedTest(name = "Algorithm: {0}")
  @ValueSource(strings = {
      // CompressorStreamFactory.BROTLI,      // read-only
      CompressorStreamFactory.BZIP2,
      CompressorStreamFactory.DEFLATE,
      CompressorStreamFactory.GZIP,
      CompressorStreamFactory.LZMA,
      CompressorStreamFactory.LZ4_BLOCK,
      CompressorStreamFactory.LZ4_FRAMED,
      CompressorStreamFactory.SNAPPY_FRAMED,
      // CompressorStreamFactory.SNAPPY_RAW,  // Compressor: snappy-raw not found.
      CompressorStreamFactory.XZ,
      // CompressorStreamFactory.Z,           // read-only
      CompressorStreamFactory.ZSTANDARD
  })
  @DisplayName("Compression test suite for Apache Commons Compress")
  void testCompression(String algo) throws CompressorException, IOException {
    List<String> csvLine = new ArrayList<>(INPUT_SIZES.size() + 1);
    csvLine.add(algo);

    for (int inputSize : INPUT_SIZES) {
      // given
      byte[] sourceBytes = Arrays.copyOfRange(textSampleBytes, 31_810, (31_810 + inputSize));

      // when
      var compressedSourceBytes = compress(sourceBytes, algo);
      var decompressedSourceBytes = decompress(compressedSourceBytes, algo);

      //then
      int sourceLength = sourceBytes.length;
      int resultLength = compressedSourceBytes.length;
      double delta = ((sourceLength - resultLength) / (double) sourceLength) * 100.00;

      log.info("Algo: {}, source size: {}, compressed size: {}, delta: {}", algo, sourceLength, resultLength, delta);
      assertArrayEquals(sourceBytes, decompressedSourceBytes);

      csvLine.add(String.valueOf(resultLength));
    }

    csvOut.add(csvLine);
  }

  private byte[] compress(byte[] sourceBytes, String algo) throws CompressorException, IOException {
    var inStream = new ByteArrayInputStream(sourceBytes);
    var outStream = new ByteArrayOutputStream();
    var bufOutStream = new BufferedOutputStream(outStream);
    try (var compressStream = COMPRESSOR_FACTORY.createCompressorOutputStream(algo, bufOutStream)) {
      IOUtils.copy(inStream, compressStream);
    }

    return outStream.toByteArray();
  }

  private byte[] decompress(byte[] compressed, String algo) throws CompressorException, IOException {
    var inStream = new BufferedInputStream(new ByteArrayInputStream(compressed));
    var outStream = new ByteArrayOutputStream();
    try (var compressStream = COMPRESSOR_FACTORY.createCompressorInputStream(algo, inStream)) {
      IOUtils.copy(compressStream, outStream);
    }

    return outStream.toByteArray();
  }
}
	package pro.toparvion.stegotext.compress;

	import org.apache.commons.compress.compressors.CompressorException;
	import org.apache.commons.compress.compressors.CompressorStreamFactory;
	import org.apache.commons.compress.utils.IOUtils;
	import org.junit.jupiter.api.AfterAll;
	import org.junit.jupiter.api.BeforeAll;
	import org.junit.jupiter.api.DisplayName;
	import org.junit.jupiter.api.TestInstance;
	import org.junit.jupiter.params.ParameterizedTest;
	import org.junit.jupiter.params.provider.ValueSource;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	import java.io.*;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.List;

	import static java.nio.file.StandardOpenOption.*;
	import static org.junit.jupiter.api.Assertions.assertArrayEquals;
	import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS;

	/**
	* A simple benchmark for comparing compression ratios of various compressing algorithms applied to a natural text
	*
	* @author Toparvion
	*/
	@TestInstance(PER_CLASS)
	public class CompressTest {
	private static final Logger log = LoggerFactory.getLogger(CompressTest.class);

	private static final List<Integer> INPUT_SIZES =
	List.of(20, 50, 100, 200, 300, 500, 1000, 2000, 5000, 10_000, 50_000, 100_000);
	private static final CompressorStreamFactory COMPRESSOR_FACTORY = new CompressorStreamFactory();
	private static final Path SOURCE_TEXT_PATH = Path.of("sandbox/doyle-return-388.txt");
	// private static final Path SOURCE_TEXT_PATH = Path.of("sandbox/pepko.txt");

	private final List<List<String>> csvOut = new ArrayList<>();
	private byte[] textSampleBytes;

	@BeforeAll
	void beforeAll() throws IOException {
	List<String> header = new ArrayList<>(INPUT_SIZES.size() + 1);
	header.add("Algo");
	INPUT_SIZES.stream()
	.map(String::valueOf)
	.forEach(header::add);
	csvOut.add(header);

	textSampleBytes = Files.readAllBytes(SOURCE_TEXT_PATH);
	}

	@AfterAll
	void afterAll() throws IOException {
	List<String> csvLines = csvOut.stream()
	.map(line -> String.join(",", line))
	.toList();
	Path csvFilePath = Path.of("compress.csv");
	Files.write(csvFilePath, csvLines, CREATE, WRITE, TRUNCATE_EXISTING);
	log.info("Written {} lines to '{}'", csvLines.size(), csvFilePath);
	}

	@ParameterizedTest(name = "Algorithm: {0}")
	@ValueSource(strings = {
	// CompressorStreamFactory.BROTLI, // read-only
	CompressorStreamFactory.BZIP2,
	CompressorStreamFactory.DEFLATE,
	CompressorStreamFactory.GZIP,
	CompressorStreamFactory.LZMA,
	CompressorStreamFactory.LZ4_BLOCK,
	CompressorStreamFactory.LZ4_FRAMED,
	CompressorStreamFactory.SNAPPY_FRAMED,
	// CompressorStreamFactory.SNAPPY_RAW, // Compressor: snappy-raw not found.
	CompressorStreamFactory.XZ,
	// CompressorStreamFactory.Z, // read-only
	CompressorStreamFactory.ZSTANDARD
	})
	@DisplayName("Compression test suite for Apache Commons Compress")
	void testCompression(String algo) throws CompressorException, IOException {
	List<String> csvLine = new ArrayList<>(INPUT_SIZES.size() + 1);
	csvLine.add(algo);

	for (int inputSize : INPUT_SIZES) {
	// given
	byte[] sourceBytes = Arrays.copyOfRange(textSampleBytes, 31_810, (31_810 + inputSize));

	// when
	var compressedSourceBytes = compress(sourceBytes, algo);
	var decompressedSourceBytes = decompress(compressedSourceBytes, algo);

	//then
	int sourceLength = sourceBytes.length;
	int resultLength = compressedSourceBytes.length;
	double delta = ((sourceLength - resultLength) / (double) sourceLength) * 100.00;

	log.info("Algo: {}, source size: {}, compressed size: {}, delta: {}", algo, sourceLength, resultLength, delta);
	assertArrayEquals(sourceBytes, decompressedSourceBytes);

	csvLine.add(String.valueOf(resultLength));
	}

	csvOut.add(csvLine);
	}

	private byte[] compress(byte[] sourceBytes, String algo) throws CompressorException, IOException {
	var inStream = new ByteArrayInputStream(sourceBytes);
	var outStream = new ByteArrayOutputStream();
	var bufOutStream = new BufferedOutputStream(outStream);
	try (var compressStream = COMPRESSOR_FACTORY.createCompressorOutputStream(algo, bufOutStream)) {
	IOUtils.copy(inStream, compressStream);
	}

	return outStream.toByteArray();
	}

	private byte[] decompress(byte[] compressed, String algo) throws CompressorException, IOException {
	var inStream = new BufferedInputStream(new ByteArrayInputStream(compressed));
	var outStream = new ByteArrayOutputStream();
	try (var compressStream = COMPRESSOR_FACTORY.createCompressorInputStream(algo, inStream)) {
	IOUtils.copy(compressStream, outStream);
	}

	return outStream.toByteArray();
	}
	}