Skip to content

Instantly share code, notes, and snippets.

@Brixomatic
Last active February 17, 2021 21:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Brixomatic/2dfc58bd9c0f830695d812489a0cf82a to your computer and use it in GitHub Desktop.
Save Brixomatic/2dfc58bd9c0f830695d812489a0cf82a to your computer and use it in GitHub Desktop.
Java translation of the Tinycrunch 1.2 encoder, requires tc_boot.prg from the original Tinycrunch 1.2 distribution.
package de.plush.brix.c64graphics.core.pipeline.stages.compression;
import static de.plush.brix.c64graphics.core.pipeline.stages.compression.Tinycrunch_1_2.Stats.StatId.*;
import static de.plush.brix.c64graphics.core.pipeline.stages.compression.Tinycrunch_1_2.Stats.StatId.load;
import static de.plush.brix.c64graphics.core.util.Utils.subList;
import static java.lang.Float.POSITIVE_INFINITY;
import static java.lang.Integer.parseInt;
import static java.lang.Math.*;
import static java.lang.String.format;
import static java.lang.System.*;
import static java.nio.file.Paths.get;
import static java.nio.file.StandardOpenOption.*;
import static java.util.Arrays.stream;
import static java.util.Map.entry;
import static net.sourceforge.argparse4j.impl.Arguments.*;
import java.io.*;
import java.net.URISyntaxException;
import java.nio.file.*;
import java.util.*;
import java.util.function.*;
import org.eclipse.collections.api.list.primitive.*;
import org.eclipse.collections.api.tuple.primitive.ByteObjectPair;
import org.eclipse.collections.impl.factory.primitive.*;
import org.eclipse.collections.impl.list.mutable.primitive.ByteArrayList;
import org.eclipse.collections.impl.map.mutable.primitive.ObjectIntHashMap;
import de.plush.brix.c64graphics.core.util.Utils;
import net.sourceforge.argparse4j.ArgumentParsers;
import net.sourceforge.argparse4j.inf.*;
/**
* Java implementation of the TinyCrunch 1.2 encoder.<br>
*
* @see <a href = "https://csdb.dk/release/?id=168629">https://csdb.dk/release/?id=168629</a>
* @author Wanja Gayk
* @author �2018 Christopher Jam, original Python code
*
*/
// https://gist.github.com/Brixomatic/2dfc58bd9c0f830695d812489a0cf82a
@SuppressWarnings({ "checkstyle:npathComplexity", "checkstyle:cyclomaticComplexity" })
public class Tinycrunch_1_2 implements Function<byte[], byte[]> {
private boolean fast;
private boolean verbose;
/**
* Create a new Tinycrunch 1.2 compression stage with optimum (slow) compression and basic output.
*
* @see #withFastMode(boolean)
* @see #withVerboseOutput(boolean)
*/
public Tinycrunch_1_2() {}
private Tinycrunch_1_2(final boolean fast, final boolean verbose) {
this.fast = fast;
this.verbose = verbose;
}
/**
* @param fast
* <tt>true</tt> to switch to greedy (fast) compression, trading compression ratio for more speed
* @return a new instance of this class with this configuration set accordingly.
*/
public Tinycrunch_1_2 withFastMode(final boolean fast) {
return new Tinycrunch_1_2(fast, verbose);
}
/**
* @param verbose
* <tt>true</tt> to get a more detailed report at the end of crunching.
* @return a new instance of this class with this configuration set accordingly.
*/
public Tinycrunch_1_2 withVerboseOutput(final boolean verbose) {
return new Tinycrunch_1_2(fast, verbose);
}
@Override
public byte[] apply(final byte[] input) {
final byte[] packed = toEncodedBinary(input);
out.println("raw length: " + input.length + " -> " + packed.length + " (bytes)");
return packed;
}
private byte[] toEncodedBinary(final byte[] rawInput) {
final MutableByteList inputData = ByteLists.mutable.of(rawInput);
out.println(format("%05d bytes read from stream", inputData.size()));
final var op = new CCruncher(fast);
final var args = new Namespace(Map.of());
final var outputBytes = raw_crunch(args, op, inputData);
if (verbose) {
op.report(true);
}
out.println(format("%05d bytes written to stream (%4.1f%% of original size)", //
outputBytes.size(), outputBytes.size() * 100.0 / inputData.size()) //
);
return outputBytes.toArray();
}
/*
* NOTE: This is a pretty straight translation of the original Python code. The source contains several oddities that only exist, because
* the Python language is as it is.
* The source should also not be "beautified", as it should be easy to compare with the original.
*/
static Path boot_path;
static {
try {
boot_path = get(Tinycrunch_1_2.class.getResource("resources/tc_boot.prg").toURI());
} catch (final URISyntaxException e) {
throw new IllegalStateException(e);
}
}
static CDataChunk load_prg(final File fi) throws IOException {
final var data = ByteLists.mutable.of(Files.readAllBytes(fi.toPath()));
final var addr = data.get(0) + data.get(1) << 8;
return new CDataChunk(addr, subList(data, 2, data.size()));
}
static void save_prg(final File fo, final CDataChunk data_chunk) throws IOException {
final var data = ByteLists.mutable.of((byte) (data_chunk.addr & 255), (byte) (data_chunk.addr >>> 8));
data.addAll(data_chunk.data);
Files.write(fo.toPath(), data.toArray(), WRITE, CREATE);
}
private static class CDataChunk {
int addr;
MutableByteList data;
String se; // source extent description. Doubles as a marker that this data has been compressed
public CDataChunk(final int addr, final MutableByteList data) {
this(addr, data, null);
}
public CDataChunk(final int addr, final MutableByteList data, final String se) {
this.addr = addr;
this.data = data;
this.se = se;
}
static CDataChunk ending_at(final int ea, final MutableByteList data, final String se) {
return new CDataChunk(ea - data.size(), data, se);
}
int end_addr() {
return addr + data.size();
}
String ext() {
return String.format("0x%04X-0x%04X", addr, end_addr());
}
CDataChunk[] split_at(final int addr) {
assert this.addr <= addr : "split address lower than start address";
assert addr <= end_addr() : "split address higher than end address";
final var sp = addr - this.addr;
final var lower = new CDataChunk(addr, subList(data, 0, sp));
final var higher = new CDataChunk(addr, subList(data, sp, data.size()));
return new CDataChunk[] { lower, higher };
}
void extend(final CDataChunk addend) {
assert end_addr() == addend.addr : "extension start address does not match end address";
data.addAll(addend.data);
}
}
private static class OptToken {
float cost;
ByteList data;
Integer next;
int length;
int offset;
public OptToken(final float cost, final ByteList data, final Integer next, final int length, final int offset) {
this.cost = cost;
this.data = data;
this.next = next;
this.length = length;
this.offset = offset;
}
@Override
public String toString() {
String dr;
if (data.size() < 5) {
dr = data.collect(Utils::toHexString).makeString(" ");
} else {
dr = data.size() + " bytes";
}
return String.format("OptToken(%d,[%s],%s,%d,%d)", cost, dr, next, length, offset);
}
}
private static class CCruncher {
int longest_copy = 17;
int max_pair_offset = 63;
int max_offset = 2048;
int longest_literal = 64;
boolean greedy;
Stats stats;
private boolean inPlace;
private MutableByteList output_bytes;
private CDataChunk input_chunk;
private MutableByteList data;
private MutableByteList remainder;
private MutableByteList literals;
public CCruncher(final boolean greedy) {
this.greedy = greedy;
reset_stats();
}
void reset_stats() {
stats = new Stats();
}
MutableByteList crunch(final CDataChunk input_chunk, final boolean inPlace) {
this.inPlace = inPlace;
output_bytes = ByteLists.mutable.of((byte) 0, (byte) 0, (byte) 0); // leave space for target address, and final byte
this.input_chunk = input_chunk;
final var split = input_chunk.split_at(max(0, input_chunk.data.size() - 1));
data = split[0].data; // everything except the last byte ( input_chunk.data[:-1] )
remainder = split[1].data; // last item in the array ( input_chunk.data[-1:] )
if (data.size() == 0) { return output_token_list(new ArrayList<OptToken>()); }
if (greedy) { return crunch_greedy(); }
return crunch_optimal();
}
/** "10xxxxxx" */
ByteList encode_literal(final ByteList literal) {
assert 0 < literal.size() && literal.size() <= 64 : "literal size out of bounds 0 < size <= 64, size: " + literal.size();
final byte count = (byte) (128 + literal.size() - 1);
final var encoding = ByteLists.mutable.of(count);
encoding.addAll(literal);
return encoding;
}
/**
* "11oooooo" offsets &lt; 64 <br>
* "0aaaaabb bbbbbbbb" bigger offsets
*/
ByteList encode_copy(final int length, int offset) {
assert 1 < length && length <= longest_copy : "copy sequence out of bounds, 1 < lenth < " + longest_copy + ", length: " + length;
if (length == 2 && offset <= max_pair_offset) {
// "11oooooo"
assert offset < 64 : "offset >= 64, offset: " + offset;
final byte count = (byte) (0xc0 + offset ^ 63); // ie, offset^0x7f
return ByteLists.mutable.of(count);
}
// "0aaaaabb bbbbbbbb"
assert 0 < offset && offset < 2048 : "offset out of bounds : 0 < offset < 2048, offset: " + offset;
// bits 1.5.10
// assert(2<= length<32)
offset -= 1;
final byte hi = (byte) (0x00 + ((offset & 0x700) >>> 8 ^ 7) + (length - 2 << 3));
final byte lo = (byte) (offset & 0xff ^ 0xff);
return ByteLists.mutable.of(hi, lo);
}
/** just finds for any point the string that extends the furthest */
private MutableByteList crunch_greedy() {
final var longestCopy = longest_copy;
final var maxOffset = max_offset;
final var tokens = new ArrayList<OptToken>();
literals = ByteLists.mutable.empty();
final Runnable tokenize_literal_if_present = () -> {
if (literals.size() > 0) {
tokens.add(new OptToken(0, encode_literal(literals), 0, literals.size(), 0));
literals = ByteLists.mutable.empty();
}
};
final var data = this.data;
final var l = data.size();
var nj = 0;
/*
* last_seen = defaultdict(lambda: -(2 ** 17))
* The argument of a defaultdict (in this case"lambda: -(1**17)")
* will be called when you try to access a key that doesn't exist
* and the return value of it will be set as the new value of this key.
* In Java we do that by using another function than get,
* e.g. computeIfAbsent in java.util or getIfAbsentPut in Eclipse collections
*/
final var last_seen = new ObjectIntHashMap<ByteList>(); // address key was last seen starting at
// kl = [tuple(data[j:j + i]) for i in range(longestCopy + 1) if j + i <= l]
// data = [1,2,3]
// kl = [(), (1,), (1,2), (1,2,3)]
// kl = [(), (2,), (2, 3)]
// kl = [(), (3,)]
for (int j = 0; j < l; ++j) {
final var kl = new ArrayList<ByteList>();
for (int i = 0; i < longestCopy + 1 && j + i <= l; ++i) {
kl.add(subList(data, j, j + i));
}
if (j == nj) {
var length = kl.size() - 1;
var noBreak = true;
while (length > 1) {
final int offset = j - last_seen.getIfAbsentPut(kl.get(length), -(1 << 17));
if (offset < maxOffset) {
final var token = encode_copy(length, offset);
if (token.get(0) != 0) {
out.println(token.collect(b -> Utils.toHexString(b)).makeString());
tokenize_literal_if_present.run();
assert offset > 0 : "offset expected to be > 0, offset: " + offset;
tokens.add(new OptToken(0, token, 0, length, offset));
nj = j + length;
noBreak = false;
break;
}
}
length -= 1;
}
if (noBreak) {
literals.add(data.get(j));
if (literals.size() == longest_literal) {
tokenize_literal_if_present.run();
}
nj = j + 1;
}
}
final var jj = j;
kl.subList(1, kl.size()).forEach(k -> last_seen.put(k, jj));
}
tokenize_literal_if_present.run();
return output_token_list(tokens);
}
private MutableByteList crunch_optimal() {
final var longestCopy = longest_copy;
final var maxPairOffset = max_pair_offset;
final var longestLiteral = longest_literal;
final var data = this.data;
final var l = data.size();
final var last_seen = new ObjectIntHashMap<ByteList>(); // address key was last seen starting at
/*
* cfile[j] contains the tail of a list that in turn contains the
* cheapest representation of the first j bytes of the file
* data containst the bytes that must be added to the stream to
* cover the bytes between that token and its predecessor
*/
final var cfile = new ArrayList<>(List.of(new OptToken(0, null, null, 0, 0)));
OptToken best = null;
for (int j = 1; j < l + 1; ++j) {
final var copy_candidates = new ArrayList<ByteList>();
for (int i = 2; i < longestCopy + 1 && j - i >= 0; ++i) {
copy_candidates.add(subList(data, j - i, j));
}
best = new OptToken(POSITIVE_INFINITY, null, null, 0, 0);
for (final var k : copy_candidates) {
final var mra = last_seen.getIfAbsentPut(k, -(1 << 17));
final var length = k.size();
final var start_addr = j - length;
assert length > 1 : "length must be > 1, length: " + length;
final var offset = j - mra;
if (offset < max_offset) {
final var nb = length > 2 || offset > maxPairOffset ? 2.012f : 1.01f;
final var cost = cfile.get(start_addr).cost + nb;
if (cost < best.cost) {
final var token = encode_copy(length, offset);
if (token.get(0) != 0) {
best = new OptToken(cost, token, start_addr, length, offset);
assert mra - length < j : "Expected mra-length < j. mra: " + mra + " - length: " + length //
+ " = " + (mra - length) + ", j; " + j;
}
}
}
}
for (int length = 1; length < longestLiteral + 1; ++length) {
final var start_addr = j - length;
if (start_addr >= 0) {
final var cost = cfile.get(start_addr).cost + length + 1.01f;
if (cost < best.cost) {
final var literal = subList(data, start_addr, j);
best = new OptToken(cost, encode_literal(literal), start_addr, literal.size(), 0);
assert best.data.size() == length + 1 : "expected data.size()== length+1, data.size(): " + data.size() + ", length+1: " + (length
+ 1);
assert start_addr < j : "expected startAddr < j, startAddr: " + start_addr + ", j: " + j;
}
}
}
cfile.add(best);
for (final var k : copy_candidates) {
last_seen.put(k, j);
}
}
assert best != null : "expected best != null";
final var tokens = new ArrayList<>(Arrays.asList(best));
while (best.next != 0) {
best = cfile.get(best.next);
tokens.add(best);
}
Collections.reverse(tokens);
return output_token_list(tokens);
}
private MutableByteList output_token_list(final ArrayList<OptToken> tokens) {
var j = tokens.size();
var watermark = j; // from here on, just store raw data
var raw_bytes_after_watermark = 0;
if (inPlace) {
// Scan the token list from the end back.
// Whenever compressed remainder is equal or longer in length to raw remainder,
// set that token to start of raw data.
var raw_bytes_after_tokenJ = 0;
var comp_bytes_after_token_j = 0;
raw_bytes_after_watermark = 0;
while (j > 0) {
j -= 1;
raw_bytes_after_tokenJ += tokens.get(j).length;
comp_bytes_after_token_j += tokens.get(j).data.size();
if (raw_bytes_after_tokenJ <= comp_bytes_after_token_j) {
watermark = j;
raw_bytes_after_watermark += raw_bytes_after_tokenJ;
raw_bytes_after_tokenJ = 0;
comp_bytes_after_token_j = 0;
}
}
}
for (final var t : tokens.subList(0, watermark)) {
stats.log_token(t);
output_bytes.addAll(t.data);
}
if (inPlace && raw_bytes_after_watermark > 0) {
final var newRemainder = subList(data, data.size() - raw_bytes_after_watermark, data.size());
newRemainder.addAll(remainder);
remainder = newRemainder;
}
if (remainder.size() > 1) {
stats.log_raw(remainder.size() - 1);
}
stats.log_header(3);
stats.log_move(1);
output_bytes.set(0, remainder.get(0));
output_bytes.set(1, (byte) ((input_chunk.addr - 1) % 256));
output_bytes.set(2, (byte) (input_chunk.addr - 1 >>> 8));
stats.log_terminator();
remainder.set(0, (byte) 0); // terminator for compressed data
output_bytes.addAll(remainder);
remainder = null;
return output_bytes;
}
void report() {
report(false);
}
void report(final boolean raw) {
stats.report(raw);
}
}
static class Stats {
static class StatCounter {
Object legend;
int ct, bi, bo;
StatCounter(final Object legend) {
this.legend = legend;
reset();
}
void reset() {
ct = 0;
bi = 0;
bo = 0;
}
void accBi(final int bi) {
accCIO100(1, bi, 0);
}
void accBo(final int bo) {
accCIO100(1, 0, bo);
}
void accBiBo(final int bi, final int bo) {
accCIO100(1, bi, bo);
}
void accBiCt(final int bi, final int ct) {
accCIO100(ct, bi, 0);
}
void accCIO100(final int ct, final int bi, final int bo) {
this.ct += ct;
this.bi += bi;
this.bo += bo;
}
int cost() {
return max(0, bo - bi);
}
int savings() {
return max(0, bi - bo);
}
void print(final String fs, final IntFunction<String> ent, final IntFunction<String> ifp) {
final var l_c = ct;
final var l_o = bo;
final var l_i = bi;
if (l_c > 0) {
out.println(String.format(fs, legend, l_c, ent.apply(l_c), ifp.apply(l_i), ifp.apply(l_o - l_i), ifp.apply(l_i - l_o), ifp.apply(l_o)));
}
}
}
enum StatId {
pair, copy, literal, header, move, gap, boot, raw, end, load, save;
}
String rs;
Map<StatId, StatCounter> counts;
MutableIntList offsets;
MutableIntList litlens;
Stats() {
rs = "";
counts = Map.ofEntries(//
entry(pair, new StatCounter("2 byte copies")), //
entry(copy, new StatCounter("n byte copies")), //
entry(literal, new StatCounter("literal strings")), //
entry(header, new StatCounter("segment headers")), //
entry(move, new StatCounter("moved to header")), //
entry(gap, new StatCounter("gaps")), //
entry(boot, new StatCounter("boot")), //
entry(raw, new StatCounter("uncompressed")), //
entry(end, new StatCounter("terminators")), //
entry(load, new StatCounter("load address")), //
entry(save, new StatCounter("save address")) //
);
offsets = IntLists.mutable.empty();
litlens = IntLists.mutable.empty();
}
void log_token(final OptToken t) {
final var dl = t.data.size();
if (t.offset == 0) {
log_literal(t.length, dl);
} else if (dl == 1) {
log_pair(t.offset, dl);
} else {
log_copy(t.length, t.offset, dl);
}
}
private void log_pair(final int offset, final int dl) {
offsets.add(offset);
rs += '1';
counts.get(pair).accBiBo(2, dl);
assert dl == 1;
}
private void log_copy(final int length, final int offset, final int dl) {
offsets.add(offset);
rs += '@';
counts.get(copy).accBiBo(length, dl);
assert dl == 2;
}
private void log_literal(final int length, final int dl) {
litlens.add(length);
rs += '.';
assert dl == 1 + length;
counts.get(literal).accBiBo(length, dl);
}
void log_boot(final int length) {
counts.get(boot).accBo(length);
}
void log_gap(final int length) {
counts.get(gap).accBo(length);
}
void log_header(final int length) {
counts.get(header).accBo(length);
}
void log_move(final int length) {
counts.get(move).accBi(length);
}
void log_raw(final int length) {
counts.get(raw).accBiBo(length, length);
}
void log_terminator() {
counts.get(end).accBo(1);
}
void log_load_addr() {
counts.get(load).accBiCt(2, 1);
}
void log_save_addr() {
counts.get(save).accBo(2);
}
void report() {
report(false);
}
void report(final boolean raw_) {
final var g1 = new StatId[] { copy, pair, literal, end };
final var g2 = new StatId[] { move, load, save, boot, header, gap, raw };
if (raw_) {
stream(g2).forEach(k -> counts.get(k).reset());
}
final var symcount = stream(g1).mapToInt(k -> counts.get(k).ct).sum();
final var vi = counts.values();
final var s_c = vi.stream().mapToInt(c -> c.ct).sum();
final var s_i = vi.stream().mapToInt(c -> c.bi).sum();
final var cost = (float) vi.stream().mapToDouble(c -> c.cost()).sum();
final var savings = vi.stream().mapToInt(c -> c.savings()).sum();
final var s_o = vi.stream().mapToInt(c -> c.bo).sum();
assert round(s_i + cost - savings) == s_o : "input + cost - savings doesn't equal output"; // need to use round, for floating point inaccuracies
final IntFunction<String> ent = x -> x <= 0 ? " n/a" : "" + format("%7.3f", log((x + 1e-20) / (symcount + 1e-20)) / log(0.5));
final IntFunction<String> noent = x -> "";
final IntFunction<String> ifp = x -> x <= 0 ? "" : "" + x;
final var hr = "+-----------------+------------------+----------------------------------+";
final var fs = "|%16s | %7s %7s | %7s %7s %7s %7s |";
out.println();
out.println(hr);
out.println(format(fs, "", "count", "entropy", "input", "cost", "savIngs", "output"));
out.println(hr);
stream(g1).map(k -> counts.get(k)).forEach(c -> c.print(fs, ent, ifp));
out.println(hr);
if (!raw_) {
stream(g2).map(k -> counts.get(k)).forEach(c -> c.print(fs, noent, ifp));
}
out.println(format(fs, "total", s_c, "", s_i, round(cost), savings, s_o));
out.println(hr);
offsets.sortThis();
if (!offsets.isEmpty()) {
out.println(format("median, maximum offset used = % d, % d", offsets.get(floorDiv(offsets.size(), 2)), offsets.get(offsets.size() - 1)));
}
litlens.sortThis();
if (!litlens.isEmpty()) {
// in the original the last arg for format is "self.litlens[-5:]", an array of one element, while the formatting "{:}" does not suggest an
// array, but it takes them
out.println(format("median, maximum litlen used = % d, %s", litlens.get(floorDiv(litlens.size(), 2)), subList(litlens, max(0, litlens.size()
- 5), litlens.size())));
}
out.println();
}
}
static byte hi(final int x) {
return (byte) (x >>> 8 & 0xff);
}
static byte lo(final int x) {
return (byte) (x & 0xff);
}
static Namespace parse_args(final String[] args) throws ArgumentParserException {
final ArgumentType<Integer> hex = (final ArgumentParser parser, final Argument arg, final String value) -> parseInt(value, 16);
final ArgumentParser parser = ArgumentParsers.newFor("Java tinycrunch").build()//
.version("${prog} 1.2") //
.defaultHelp(true) //
.description(""); //
parser.addArgument("-V", "--version").action(version());
parser.addArgument("infile").type(fileType().verifyCanRead().verifyIsFile()).help("(.prg file unless -r is used)");
// parser.addArgument("outfile").type(fileType().verifyCanWrite().or().verifyCanCreate()).help("(.prg file unless -r is used");
parser.addArgument("outfile").type(fileType()).help("(.prg file unless -r is used)");
final var group = parser.addMutuallyExclusiveGroup().required(true);
group.addArgument("-s", "--startAddress").dest("startAddr").help("start address").type(hex).setDefault((Integer) null);
group.addArgument("-e", "--endAddress").dest("endAddr").help("end address").type(hex).setDefault((Integer) null);
group.addArgument("-i", "--inPlace").dest("inPlace").help("compress to end of destination area").action(storeTrue());
group.addArgument("-c", "--selfExtracting").action(storeTrue());
group.addArgument("-r", "--raw").action(storeTrue()).help("read/write .bin files, no header. cf readme.txt");
parser.addArgument("-j", "--jmp").dest("execAddr").help("execution address for self extracting .prg (requires -x)").setDefault("0x080d").type(hex);
parser.addArgument("-p", "--paramFile").type(fileType().verifyCanWrite().or().verifyCanCreate()).setDefault((Object) null).help(
"generated asm include file containing a define for the output start address");
parser.addArgument("-v", "--verbose").action(storeTrue());
parser.addArgument("-f", "--fast").action(storeTrue()).help("faster (greedy) compression (default is optimal size)");
return parser.parseArgs(args);
}
static CDataChunk level_crunch(final Namespace args, final CCruncher op, final CDataChunk input_chunk) {
final var output_bytes = op.crunch(input_chunk, args.getBoolean("inPlace"));
int la;
if (args.getBoolean("inPlace")) {
la = input_chunk.end_addr() - output_bytes.size();
} else if (args.getInt("endAddr") != null) {
la = args.getInt("endAddr") - output_bytes.size();
} else {
la = args.getInt("startAddr");
}
final var output_chunk = new CDataChunk(la, output_bytes);
if (args.get("paramFile") != null) {
try {
final Path path = args.<File> get("paramFile").toPath();
Files.writeString(path, format("dcSrc=$%04X", output_chunk.addr) + args.<File> get("paramFile"));
} catch (final Exception e) {
e.printStackTrace();
}
}
return output_chunk;
}
static MutableByteList raw_crunch(final Namespace args, final CCruncher op, final MutableByteList input_data) {
assert !args.getBoolean("inPlace") : "--inPlace makes no sense for raw mode";
assert args.get("paramFile") == null : "cannot generate paramFile for raw mode";
final var id = ByteLists.mutable.ofAll(input_data);
id.add((byte) 0); // add fake load address, and dummy byte-to-move-to-header
final var input_chunk = new CDataChunk(0, id); // fake load address = 0
final var output_bytes = op.crunch(input_chunk, false);
return subList(output_bytes, 3, output_bytes.size()); // drop header
}
/**
* compresses the input file in two segments.
* The first contains a compressed copy of the data
* to be decrunched to the area after the loaded file
* The second contains the remaining data,
* compressed in place.<br>
* <br>
* It takes an iteration or three to find the optimal split point
*
* @throws IOException
*/
static CDataChunk sfx_crunch(final Namespace args, final CCruncher op, final CDataChunk input_chunk) throws IOException {
final var __ = new Object() {
void dprint(final String... prargs) {
if (args.getBoolean("verbose")) {
stream(prargs).forEach(out::println);
if (prargs.length == 0) {
out.println();
}
}
}
void disp_chunks(final List<CDataChunk> chunks) {
chunks.forEach(chunk -> {
if (chunk.se == null) {
dprint(format("data segment at %s, uncompressed", chunk.ext()));
} else {
dprint(format("data segment at %s, decrunches to %s", chunk.ext(), chunk.se));
}
});
}
};
__.dprint();
__.dprint(String.format(input_chunk.ext()));
final var boot_chunk = load_prg(boot_path.toFile());
final var oCo = boot_chunk.split_at(boot_chunk.end_addr() - 3);
final var output_chunk = oCo[0];
final var offsets = oCo[1];
final var patch_offsets = offsets.data;
final var oStart = patch_offsets.get(2);
__.dprint(format(" boot at %s", output_chunk.ext()));
final var data_start = output_chunk.end_addr();
final var monolith = new CDataChunk(data_start, op.crunch(input_chunk, false), input_chunk.ext());
final var monolith_stats = op.stats;
__.dprint(format(" monolith at %s", monolith.ext()));
List<CDataChunk> chunks;
if (input_chunk.addr >= monolith.end_addr() || input_chunk.end_addr() <= monolith.addr) {
__.dprint("(doesn't overlap output, using as is)");
chunks = List.of(monolith); // this is safe because it doesn't overlap the input chunk
} else {
var split_point = min(monolith.end_addr() + 12, input_chunk.end_addr() - 1); // assume compression is slightly worse
var max_gap = floorDiv(input_chunk.data.size(), 2000); // try for 0.05% bytes wasted between output segments
while (true) {
op.reset_stats();
if (split_point >= input_chunk.end_addr()) {
__.dprint(format("\nnew split point of 0x%04x is past end of input.", split_point));
split_point = data_start;
if (input_chunk.end_addr() < data_start) {
final var lCuC = input_chunk.split_at(split_point);
final var lower_chunk = lCuC[0];
final var upper_chunk = lCuC[1];
chunks = List.of( //
upper_chunk, //
new CDataChunk(input_chunk.end_addr(), op.crunch(lower_chunk, false), lower_chunk.ext()) //
);
} else {
__.dprint("Just storing raw input file after header");
chunks = List.of( //
input_chunk//
);
}
__.disp_chunks(chunks);
break;
}
final var lCuC = input_chunk.split_at(split_point);
final var lower_chunk = lCuC[0];
final var upper_chunk = lCuC[1];
chunks = List.of( //
new CDataChunk(data_start, op.crunch(upper_chunk, false), upper_chunk.ext()), //
CDataChunk.ending_at(split_point, op.crunch(lower_chunk, true), lower_chunk.ext()) // in place => safe
);
final var gap = chunks.get(1).addr - chunks.get(0).end_addr();
if (gap < 0) {
var adjustment = -gap;
adjustment -= floorDiv(adjustment, 5); // reduce larger steps a little
__.disp_chunks(chunks);
__.dprint(format("segment overlap = %d", -gap));
__.dprint(format("shifting split up by %d bytes and recrunching", adjustment));
split_point += adjustment;
continue;
}
__.disp_chunks(chunks);
__.dprint(format("segment gap = %d (max=%d)", gap, max_gap));
if (gap > max_gap) {
final var adjustment = gap - floorDiv(gap, 4);
__.dprint(format("shifting split down by %d bytes and recrunching", adjustment));
split_point -= adjustment;
max_gap += 1 + max_gap; // increase tolerance to escape oscillation.
} else {
/*
* ok. At this point,
* gap >= 0
* chunks[1].addr-chunks[0].end_addr() >= 0
* chunks[1].addr >= chunks[0].end_addr()
* chunks[1].end_addr() >= chunks[0].end_addr() (as c1.end_addr>=c1.addr)
* split_point >= chunks[0].end_addr()
* upper_chunk.addr >= chunks[0].end_addr()
* therefore, chunk 0 is safe.
*/
__.dprint(gap > 0 ? "close enough." : "perfect.");
break;
}
}
}
op.stats.log_boot(output_chunk.data.size());
for (final ByteObjectPair<CDataChunk> chunkOffset : patch_offsets.zip(chunks.subList(0, 2))) {
final var offset = chunkOffset.getOne();
final var chunk = chunkOffset.getTwo();
final var gap = chunk.addr - output_chunk.end_addr();
if (gap > 0) {
op.stats.log_gap(gap);
output_chunk.data.addAll(ByteArrayList.newWithNValues(gap, (byte) 0xff));
}
if (chunk.se != null) {
output_chunk.data.set(offset + 1, lo(chunk.addr));
output_chunk.data.set(offset + 3, lo(chunk.addr));
output_chunk.data.set(offset + 4, (byte) 0x20); // replace LDA decrunch with JSR decrunch
} else {
op.stats.log_raw(chunk.data.size());
}
output_chunk.extend(chunk);
}
final var exec_addr = args.getInt("execAddr");
output_chunk.data.set(oStart + 1, lo(exec_addr));
output_chunk.data.set(oStart + 2, hi(exec_addr));
return output_chunk;
}
public static void main(final String[] argsv) throws ArgumentParserException, IOException {
try {
final var args = parse_args(argsv);
final var op = new CCruncher(args.getBoolean("fast"));
if (args.getBoolean("raw")) {
if (args.<File> get("infile").getName().endsWith(".prg")) {
err.println("warning, input file will be parsed as a .bin");
}
if (args.<File> get("outfile").getName().endsWith(".prg")) {
err.println("warning, output file will be written as a .bin");
}
final var input_data = ByteLists.mutable.of(Files.readAllBytes(args.<File> get("infile").toPath()));
final var original_length = input_data.size();
out.println(format("%05d bytes read from %s", //
original_length, args.<File> get("infile").getName()));
final var output_bytes = raw_crunch(args, op, input_data);
if (args.getBoolean("verbose")) {
op.report(true);
}
Files.write(args.<File> get("outfile").toPath(), output_bytes.toArray(), WRITE, CREATE);
final var compressed_length = output_bytes.size();
out.println(format("% 5d bytes written to %s (%4.1f%% of original size)", //
compressed_length, args.<File> get("outfile").getName(), //
compressed_length * 100.0 / original_length) //
);
} else {
if (args.<File> get("infile").getName().endsWith(".bin")) {
err.println("warning, input file will be parsed as a .prg");
}
if (args.<File> get("outfile").getName().endsWith(".bin")) {
err.println("warning, output file will be written as a .prg");
}
final var input_chunk = load_prg(args.<File> get("infile"));
final var original_length = input_chunk.data.size() + 2;
out.println(format("%s: %05d bytes read from %s", //
input_chunk.ext(), original_length, args.<File> get("infile").getName()));
if (input_chunk.data.size() < 1) {
err.println("Input file can't be empty");
exit(1);
}
CDataChunk output_chunk;
if (args.getBoolean("selfExtracting")) {
if (input_chunk.addr < 0x0200) {
out.println(input_chunk.addr);
err.println("Destination addresses below 0x0200 not supported by -x");
exit(1);
}
output_chunk = sfx_crunch(args, op, input_chunk);
} else {
output_chunk = level_crunch(args, op, input_chunk);
}
op.stats.log_load_addr();
op.stats.log_save_addr();
if (args.getBoolean("verbose")) {
op.report();
}
save_prg(args.<File> get("outfile"), output_chunk);
final var compressed_length = output_chunk.data.size() + 2;
out.print(format("%s: %05d bytes written to %s (%04.1f%% of original size)", //
output_chunk.ext(), compressed_length, args.<File> get("outfile").getName(), //
compressed_length * 100.0 / original_length));
}
} catch (final ArgumentParserException e) {
e.getParser().printHelp();
}
}
}
package de.plush.brix.c64graphics.core.util;
public class Utils {
// ...
public static String toHexString(final byte b) {
return format("%2s", Integer.toHexString(Byte.toUnsignedInt(b))).replace(' ', '0');
}
/** In Eclipse Collections 10.4.0 {@link ByteArrayList#subList(int, int)} is still not implemented. */
public static MutableByteList subList(final MutableByteList in, final int from, final int to) {
return ByteLists.mutable.of(copyOfRange(in.toArray(), from, to));
}
/** In Eclipse Collections 10.4.0 {@link ByteArrayList#subList(int, int)} is still not implemented. */
public static MutableIntList subList(final MutableIntList in, final int from, final int to) {
return IntLists.mutable.of(copyOfRange(in.toArray(), from, to));
}
}
@Brixomatic
Copy link
Author

Brixomatic commented Aug 25, 2020

		<dependency>
			<groupId>org.eclipse.collections</groupId>
			<artifactId>eclipse-collections-api</artifactId>
			<version>10.4.0</version>
		</dependency>
		<dependency>
			<groupId>org.eclipse.collections</groupId>
			<artifactId>eclipse-collections</artifactId>
			<version>10.4.0</version>
		</dependency>
		<dependency>
			<groupId>net.sourceforge.argparse4j</groupId>
			<artifactId>argparse4j</artifactId>
			<version>0.8.1</version>
		</dependency>

@Brixomatic
Copy link
Author

fixed: missing arguments didn't print help message.

@Brixomatic
Copy link
Author

fixed: visibility of factory methods package -> public

@Brixomatic
Copy link
Author

fixed: some remainder calculation was wrong.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment