Created
May 25, 2013 06:18
-
-
Save takawitter/5648124 to your computer and use it in GitHub Desktop.
とりあえずLZ77.javaを改造してLZEndを実装。まだ完備辞書使ってないのでちゃんとしたLZEndではないし、圧縮率もめちゃ悪い(というかたいてい増える)。あと構築方法もLZEndのものとは異なる。でもLZEndの特徴であるランダムアクセスは実現できてるし、そのおかげで展開時に展開済み文字列にアクセスしなくても良いようになってる。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class LZEnd { | |
static class Phrase{ | |
public Phrase() { | |
} | |
public Phrase(int phraseNum, int ref, int len, char nonMatchedChar) { | |
this.phraseNum = phraseNum; | |
this.ref = ref; | |
this.len = len; | |
this.nonMatchedChar = nonMatchedChar; | |
} | |
public int getPhraseNum() { | |
return phraseNum; | |
} | |
public void setPhraseNum(int phraseNum) { | |
this.phraseNum = phraseNum; | |
} | |
public int getRef() { | |
return ref; | |
} | |
public void setRef(int ref) { | |
this.ref = ref; | |
} | |
public int getLen() { | |
return len; | |
} | |
public void setLen(int len) { | |
this.len = len; | |
} | |
public char getNonMatchedChar() { | |
return nonMatchedChar; | |
} | |
public void setNonMatchedChar(char nonMatchedChar) { | |
this.nonMatchedChar = nonMatchedChar; | |
} | |
private int phraseNum; | |
private int ref; | |
private int len; | |
private char nonMatchedChar; | |
} | |
private static void compress(CharSequence src, Appendable out, int windowSize) | |
throws IOException{ | |
int phraseCount = 0; | |
Map<Character, List<Phrase>> charToPhrases = new HashMap<Character, List<Phrase>>(); | |
Map<Integer, Phrase> numToPhrase = new HashMap<Integer, Phrase>(); | |
int n = src.length(); | |
for(int i = 0; i < n; i++){ | |
char target = src.charAt(i); | |
// find longest match | |
Phrase matchedPhrase = null; | |
char nonMatchChar = 0xff; | |
List<Phrase> phrasesForTarget = charToPhrases.get(target); | |
if(phrasesForTarget != null){ | |
Iterator<Phrase> it = phrasesForTarget.iterator(); | |
while(it.hasNext()){ | |
Phrase p = it.next(); | |
if((phraseCount - p.getPhraseNum()) > windowSize){ | |
numToPhrase.remove(p.getPhraseNum()); | |
it.remove(); | |
continue; | |
} | |
if(matched(src, i, numToPhrase, p)){ | |
if(matchedPhrase != null && matchedPhrase.getLen() > p.getLen()) continue; | |
matchedPhrase = p; | |
nonMatchChar = (char)0xff; | |
if((i + p.getLen() + 1) < n){ | |
nonMatchChar = src.charAt(i + p.getLen() + 1); | |
} | |
} | |
} | |
} | |
Phrase newPhrase; | |
if(matchedPhrase != null){ | |
newPhrase = new Phrase( | |
phraseCount, matchedPhrase.getPhraseNum(), | |
matchedPhrase.getLen() + 1, nonMatchChar); | |
out.append((char)(phraseCount - matchedPhrase.getPhraseNum())) | |
.append((char)newPhrase.getLen()) | |
.append(nonMatchChar); | |
} else{ | |
newPhrase = new Phrase(phraseCount, -1, 0, target); | |
out.append((char)0x00).append((char)0x00).append(target); | |
phrasesForTarget = new LinkedList<Phrase>(); | |
charToPhrases.put(target, phrasesForTarget); | |
} | |
phrasesForTarget.add(newPhrase); | |
numToPhrase.put(phraseCount++, newPhrase); | |
i += newPhrase.getLen(); | |
} | |
} | |
private static boolean matched(CharSequence src, int pos, Map<Integer, Phrase> numToPhrase, Phrase phrase){ | |
int dstPos = pos + phrase.getLen(); | |
if(src.length() <= dstPos) return false; | |
if(src.charAt(dstPos) != phrase.getNonMatchedChar()) return false; | |
if(phrase.getLen() == 0) return true; | |
int ref = phrase.getRef(); | |
Phrase nextPhrase = numToPhrase.get(ref); | |
if(nextPhrase == null) return false; | |
return matched(src, pos, numToPhrase, nextPhrase); | |
} | |
public static void decompress(CharSequence src, StringBuilder out, int windowSize){ | |
Map<Integer, Phrase> phrases = new HashMap<Integer, Phrase>(); | |
int n = src.length(); | |
for(int i = 0; i < n; i += 3){ | |
int phraseNum = i / 3; | |
int start = src.charAt(i); | |
int matchedLen = src.charAt(i + 1); | |
char nonMatchChar = src.charAt(i + 2); | |
if(start != 0){ | |
decodePhrase(phrases, phraseNum - start, out); | |
phrases.put(phraseNum, new Phrase(phraseNum, phraseNum - start, matchedLen, nonMatchChar)); | |
} else{ | |
phrases.put(phraseNum, new Phrase(phraseNum, 0, 0, nonMatchChar)); | |
} | |
if(nonMatchChar != 0xff){ | |
out.append(nonMatchChar); | |
} | |
int oldest = phraseNum - windowSize; | |
if(oldest > 0) phrases.remove(oldest); | |
} | |
} | |
private static void decodePhrase(Map<Integer, Phrase> phrases, int phraseNum, StringBuilder out){ | |
Phrase p = phrases.get(phraseNum); | |
if(p.getLen() > 0){ | |
decodePhrase(phrases, p.getRef(), out); | |
} | |
out.append(p.getNonMatchedChar()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment