Created
October 22, 2014 15:37
-
-
Save arturopala/84e0be44bbe95208e4d8 to your computer and use it in GitHub Desktop.
Scala LZW compress and decompress
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.collection.mutable.{ ListBuffer, HashMap } | |
object LZW { | |
private val initialDictionaryCompress: Seq[(String, Int)] = (0 until 256) map (i => ("" + i.toChar, i)) | |
private val initialDictionaryDecompress: Seq[(Int, String)] = (0 until 256) map (i => (i, "" + i.toChar)) | |
def compress(uncompressed: String): Seq[Int] = { | |
val dictionary = HashMap(initialDictionaryCompress: _*) | |
val result = new ListBuffer[Int]() | |
var wc = "" | |
var w = "" | |
uncompressed.foreach(c => { | |
wc = w + c | |
dictionary.get(wc) match { | |
case Some(_) => { | |
w = wc | |
} | |
case None => { | |
dictionary.get(w) match { | |
case Some(entry) => { | |
result += entry | |
dictionary.put(wc, dictionary.size) | |
w = String.valueOf(c) | |
} | |
case None => throw new IllegalArgumentException(s"Character out of range: $c"); | |
} | |
} | |
} | |
}) | |
if (w != "") result += dictionary(w) | |
result.toSeq | |
} | |
def decompress(compressed: Seq[Int]): String = { | |
val dictionary = HashMap(initialDictionaryDecompress: _*) | |
var w = "" + compressed.head.asInstanceOf[Char] | |
val result = new StringBuilder(w); | |
compressed.tail.foreach(k => { | |
val e = dictionary.get(k) match { | |
case Some(entry) => entry | |
case None if (k == dictionary.size) => w + w.charAt(0) | |
case _ => throw new IllegalArgumentException("Bad compressed k: " + k); | |
} | |
result.append(e) | |
dictionary.put(dictionary.size, (w + e.charAt(0))) | |
w = e | |
}) | |
result.toString() | |
} | |
} | |
import org.junit.runner._ | |
import org.specs2.execute.AsResult | |
import org.specs2.mutable._ | |
import org.specs2.runner._ | |
@RunWith(classOf[JUnitRunner]) | |
class LZWSpec extends Specification with TestData { | |
"LZW" should { | |
val text = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed consequat luctus nunc nec luctus. Aliquam tempor egestas ipsum, mollis suscipit sem molestie eu. Praesent ultricies viverra libero, sed blandit velit aliquam sit amet. Duis pulvinar est et risus ornare auctor. Vivamus nibh ex, consectetur hendrerit tempus in, volutpat nec ante. Fusce egestas eget tortor nec semper. Phasellus vehicula orci et diam dapibus, et malesuada felis laoreet. Etiam rutrum porta ante sit amet molestie. Cras semper dolor sed lacus fringilla pulvinar. Aliquam at commodo est, id vehicula turpis. Mauris consectetur congue mauris non eleifend. Donec convallis luctus odio sed fermentum. In.""" | |
val compressed = Seq(76, 111, 114, 101, 109, 32, 105, 112, 115, 117, 260, 100, 111, 108, 257, 32, 115, 105, 116, 32, 97, 109, 101, 116, 44, 32, 99, 111, 110, 115, 101, 99, 116, 278, 117, 114, 275, 100, 262, 105, 115, 99, 105, 110, 103, 32, 101, 108, 273, 46, 32, 83, 101, 100, 281, 283, 285, 113, 117, 97, 274, 108, 117, 287, 117, 115, 32, 110, 117, 110, 99, 322, 286, 32, 317, 319, 115, 305, 65, 303, 313, 276, 32, 288, 109, 112, 270, 101, 103, 101, 115, 116, 97, 321, 262, 264, 109, 280, 109, 268, 303, 321, 264, 296, 294, 274, 285, 260, 354, 108, 345, 116, 105, 101, 301, 117, 305, 80, 114, 97, 345, 101, 110, 274, 117, 108, 116, 114, 105, 297, 345, 32, 118, 105, 118, 101, 114, 374, 329, 105, 98, 391, 111, 280, 285, 309, 98, 108, 97, 110, 293, 274, 390, 303, 274, 97, 335, 314, 260, 272, 410, 277, 116, 305, 68, 117, 295, 32, 112, 380, 388, 110, 97, 291, 366, 301, 274, 383, 358, 32, 257, 427, 258, 275, 318, 116, 257, 305, 86, 389, 276, 320, 322, 395, 104, 301, 120, 280, 282, 284, 286, 288, 116, 290, 32, 104, 377, 100, 258, 383, 274, 339, 424, 349, 110, 280, 118, 268, 117, 116, 112, 315, 327, 326, 404, 288, 305, 70, 320, 99, 369, 343, 366, 348, 301, 344, 466, 257, 441, 291, 110, 328, 362, 112, 391, 372, 104, 348, 302, 317, 321, 390, 104, 384, 380, 97, 435, 114, 297, 431, 32, 293, 337, 100, 97, 112, 395, 320, 280, 278, 32, 109, 411, 345, 314, 519, 32, 102, 302, 422, 403, 257, 101, 278, 305, 69, 367, 337, 114, 474, 544, 260, 341, 114, 347, 275, 378, 369, 415, 275, 417, 526, 268, 366, 368, 305, 67, 374, 357, 259, 499, 291, 267, 269, 291, 400, 329, 97, 99, 447, 102, 383, 299, 105, 108, 403, 423, 425, 298, 428, 333, 412, 337, 477, 282, 109, 354, 267, 301, 346, 280, 105, 309, 507, 509, 581, 458, 114, 521, 332, 32, 77, 97, 290, 422, 454, 285, 287, 289, 291, 454, 103, 117, 369, 527, 609, 321, 110, 283, 301, 365, 105, 533, 405, 419, 283, 328, 454, 118, 411, 356, 329, 440, 447, 111, 293, 111, 271, 308, 532, 391, 277, 378, 265, 305, 73, 110, 46) | |
"decompress sequence of ints" in { | |
val result = LZW.decompress(compressed) | |
result must beEqualTo(text) | |
} | |
"compress text" in { | |
val result = LZW.compress(text) | |
result must beEqualTo(compressed) | |
} | |
"compress and decompress same text" in { | |
val text = SOME_RAW_TEXT | |
val result1 = LZW.compress(text) | |
val result2 = LZW.decompress(result1) | |
result2 must beEqualTo(text) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment