Skip to content

Instantly share code, notes, and snippets.

@pjt33
Last active August 29, 2015 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pjt33/9094143 to your computer and use it in GitHub Desktop.
Save pjt33/9094143 to your computer and use it in GitHub Desktop.
GolfScript golfer
import java.io.*;
import java.util.*;
public class GolfScriptGolfer {
public static void main(String[] args) throws Exception {
if (args.length == 1) {
String src = readFully(args[0]);
String golfed = golf(src);
String outputFile = (args[0].endsWith(".gs") ? args[0].substring(0, args[0].length() - 3) : args[0]) + ".min.gs";
StringBuilder sb = new StringBuilder();
sb.append(golfed);
sb.append("\n\n# ");
sb.append(golfed.length());
sb.append(" bytes\n\n# xxd:\n# ");
sb.append(xxd(golfed).replace("\n", "\n# "));
write(outputFile, sb.toString());
}
else System.err.println("Usage: java GolfScriptGolfer input.gs");
}
private static String golf(String src) {
List<Token> tokens = parse(src);
StringBuilder sb = new StringBuilder();
for (Token tok : tokens) {
if (sb.length() > 0) sb.append(tok.separator(sb.charAt(sb.length() - 1)));
sb.append(tok.golfed());
}
return sb.toString();
}
private static List<Token> parse(String src) {
List<Token> rv = new ArrayList<Token>();
int tokStart = 0;
while (tokStart < src.length()) {
// Everything until the first "'# is a generic.
int idxDblStr = indexOfOrLen(src, '"', tokStart);
int idxSglStr = indexOfOrLen(src, '\'', tokStart);
int idxComment = indexOfOrLen(src, '#', tokStart);
int min = Math.min(idxDblStr, Math.min(idxSglStr, idxComment));
if (min > tokStart) {
rv.add(new Generic(src.substring(tokStart, min)));
tokStart = min;
}
if (tokStart == src.length()) break;
switch (src.charAt(tokStart)) {
case '"':
case '\'':
// In a "-string, any \ escapes the following char.
// In a '-string, the only escapes are \\ and \'.
// But in either case we can parse by looking for an unescaped charAt(tokStart).
char delim = src.charAt(tokStart);
int closeQuote = tokStart + 1;
while (closeQuote < src.length()) {
char ch = src.charAt(closeQuote);
if (ch == delim) break;
else if (ch == '\\') closeQuote += 2;
else closeQuote++;
}
if (closeQuote == src.length()) throw new IllegalArgumentException("Unclosed string");
rv.add(new GString(src.substring(tokStart, closeQuote + 1)));
tokStart = closeQuote + 1;
break;
case '#':
// If it's a ## then we ignore everything until the next ##.
int newlineFrom = tokStart + 1;
if (tokStart + 1 < src.length() && src.charAt(tokStart + 1) == '#') {
int newlineIdx = src.indexOf('\n', tokStart + 2);
if (newlineIdx > tokStart) {
int hashHashIdx = src.indexOf("##", newlineIdx + 1);
if (hashHashIdx > newlineIdx) newlineFrom = hashHashIdx + 2;
else throw new IllegalArgumentException("Unclosed ## block");
}
else throw new IllegalArgumentException("Unclosed ## block");
}
// Look for end of line
int tokEnd = indexOfOrLen(src, '\n', newlineFrom);
rv.add(new Comment(src.substring(tokStart, tokEnd)));
tokStart = tokEnd + 1;
break;
default:
throw new IllegalStateException();
}
}
return rv;
}
private static int indexOfOrLen(String str, char ch, int off) {
int idx = str.indexOf(ch, off);
return idx == -1 ? str.length() : idx;
}
private static String xxd(String str) {
StringBuilder out = new StringBuilder();
int off = 0, len = str.length();
while (off < len) {
out.append(String.format("%07x:", off));
for (int i = 0; i < 16; i++) {
if ((i & 1) == 0) out.append(' ');
out.append(off + i < len ? String.format("%02x", (int)str.charAt(off + i)) : " ");
}
out.append(" ");
for (int i = 0; i < 16; i++) {
if (off + i < len) {
char ch = str.charAt(off + i);
out.append((ch >= ' ' && ch <= '~') ? ch : '.');
}
}
off += 16;
out.append("\n");
}
return out.toString();
}
private static String readFully(String filename) throws Exception {
StringBuilder sb = new StringBuilder();
FileInputStream fis = new FileInputStream(filename);
InputStreamReader isr = new InputStreamReader(fis, "ISO-8859-1");
char[] buf = new char[4096];
int len;
while ((len = isr.read(buf)) != -1) {
sb.append(buf, 0, len);
}
isr.close();
return sb.toString();
}
private static void write(String filename, String contents) throws Exception {
FileOutputStream fos = new FileOutputStream(filename);
fos.write(contents.getBytes("ISO-8859-1"));
fos.close();
}
abstract static class Token
{
protected final String raw;
protected Token(String raw) {
this.raw = raw;
}
// If this token needs separating from the previous one, return a separator.
public String separator(char prev) {
return "";
}
public abstract String golfed();
@Override public String toString() {
return raw;
}
}
static class Generic extends Token {
public Generic(String generic) {
super(generic);
}
public String golfed() {
// Replace multiple whitespace with single whitespace;
// Eliminate whitespace altogether unless the characters it separates would form a single token.
StringBuilder sb = new StringBuilder();
char beforeWS = '#'; // Guaranteed not to come up in the actual token; not whitespace or alphanumeric
char prev = '#';
for (char ch : raw.toCharArray()) {
if (Character.isWhitespace(ch)) {
beforeWS = prev;
}
else {
if (beforeWS != '#' && needSeparator(beforeWS, ch)) sb.append(' ');
beforeWS = '#';
prev = ch;
sb.append(ch);
}
}
return sb.toString();
}
@Override
public String separator(char prev) {
// Find our first non-whitespace char.
for (char ch : raw.toCharArray()) {
if (!Character.isWhitespace(ch)) {
return needSeparator(prev, ch) ? " " : "";
}
}
// This appears to be an empty token after golfing.
return "";
}
private static boolean needSeparator(char before, char after) {
// [:alpha:_][:alnum:] is one token
if ((Character.isAlphabetic(before) || before == '_') &&
(Character.isLetterOrDigit(after) || after == '_')) return true;
// [:digit:-][:digit:] is one token
if ((Character.isDigit(before) || before == '-') && Character.isDigit(after)) return true;
return false;
}
}
static class GString extends Token {
public GString(String delimited) {
super(delimited);
}
public String golfed() {
// Expand escapes, then look for minimal escaped version in either " or '.
StringBuilder value = new StringBuilder();
int off = 1;
boolean singleQuoted = raw.charAt(0) == '\'';
while (off < raw.length() - 1) {
char ch = raw.charAt(off);
if (ch != '\\') {
value.append(ch);
off++;
}
else {
// What kind of escape is it?
char esc = raw.charAt(off + 1);
if (singleQuoted) {
if (esc == '\'' || esc == '\\') {
value.append(esc);
off += 2;
}
else {
value.append('\\');
off++;
}
}
else {
// Double-quoted strings support a lot more.
switch (esc) {
case 'n': value.append('\n'); break;
case 's': value.append(' '); break;
case 'r': value.append('\r'); break;
case 't': value.append('\t'); break;
case 'v': value.append('\u000b'); break;
case 'f': value.append('\f'); break;
case 'b': value.append('\b'); break;
case 'a': value.append('\u0007'); break;
case 'e': value.append('\u001b'); break;
case 'x':
// Two-digit hex
value.append((char)Integer.parseInt(raw.substring(off + 2, off + 4), 16));
off += 2;
break;
case 'u':
// Four-digit hex
value.append((char)Integer.parseInt(raw.substring(off + 2, off + 6), 16));
off += 4;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
// Three-digit octal
value.append((char)Integer.parseInt(raw.substring(off + 1, off + 4), 8));
off += 2; // The first digit was already counted
break;
// Ignore [cCM]: I won't use those in source
default:
value.append(esc); break;
}
off += 2;
}
}
}
String literal = value.toString();
// Which delimiter to use?
char delim;
// "#{..." must be double-quoted, and '#{...' must be single-quoted because of string interpolation.
if (literal.contains("#{")) delim = raw.charAt(0);
else {
// Count instances of the two delimiters.
int sgl = 0, dbl = 0;
for (char ch : literal.toCharArray()) {
if (ch == '\'') sgl++;
if (ch == '"') dbl++;
}
delim = sgl <= dbl ? '\'' : '"';
}
StringBuilder output = new StringBuilder();
output.append(delim);
for (char ch : literal.toCharArray()) {
if (ch == delim || ch == '\\') output.append('\\');
output.append(ch);
}
output.append(delim);
return output.toString();
}
}
static class Comment extends Token {
public Comment(String comment) {
super(comment);
}
public String golfed() {
return "";
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment