Skip to content

Instantly share code, notes, and snippets.

@arturopala
Created October 22, 2014 15:37
Show Gist options
  • Save arturopala/84e0be44bbe95208e4d8 to your computer and use it in GitHub Desktop.
Save arturopala/84e0be44bbe95208e4d8 to your computer and use it in GitHub Desktop.
Scala LZW compress and decompress
import scala.collection.mutable.{ ListBuffer, HashMap }
object LZW {
private val initialDictionaryCompress: Seq[(String, Int)] = (0 until 256) map (i => ("" + i.toChar, i))
private val initialDictionaryDecompress: Seq[(Int, String)] = (0 until 256) map (i => (i, "" + i.toChar))
def compress(uncompressed: String): Seq[Int] = {
val dictionary = HashMap(initialDictionaryCompress: _*)
val result = new ListBuffer[Int]()
var wc = ""
var w = ""
uncompressed.foreach(c => {
wc = w + c
dictionary.get(wc) match {
case Some(_) => {
w = wc
}
case None => {
dictionary.get(w) match {
case Some(entry) => {
result += entry
dictionary.put(wc, dictionary.size)
w = String.valueOf(c)
}
case None => throw new IllegalArgumentException(s"Character out of range: $c");
}
}
}
})
if (w != "") result += dictionary(w)
result.toSeq
}
def decompress(compressed: Seq[Int]): String = {
val dictionary = HashMap(initialDictionaryDecompress: _*)
var w = "" + compressed.head.asInstanceOf[Char]
val result = new StringBuilder(w);
compressed.tail.foreach(k => {
val e = dictionary.get(k) match {
case Some(entry) => entry
case None if (k == dictionary.size) => w + w.charAt(0)
case _ => throw new IllegalArgumentException("Bad compressed k: " + k);
}
result.append(e)
dictionary.put(dictionary.size, (w + e.charAt(0)))
w = e
})
result.toString()
}
}
import org.junit.runner._
import org.specs2.execute.AsResult
import org.specs2.mutable._
import org.specs2.runner._
@RunWith(classOf[JUnitRunner])
class LZWSpec extends Specification with TestData {
"LZW" should {
val text = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed consequat luctus nunc nec luctus. Aliquam tempor egestas ipsum, mollis suscipit sem molestie eu. Praesent ultricies viverra libero, sed blandit velit aliquam sit amet. Duis pulvinar est et risus ornare auctor. Vivamus nibh ex, consectetur hendrerit tempus in, volutpat nec ante. Fusce egestas eget tortor nec semper. Phasellus vehicula orci et diam dapibus, et malesuada felis laoreet. Etiam rutrum porta ante sit amet molestie. Cras semper dolor sed lacus fringilla pulvinar. Aliquam at commodo est, id vehicula turpis. Mauris consectetur congue mauris non eleifend. Donec convallis luctus odio sed fermentum. In."""
val compressed = Seq(76, 111, 114, 101, 109, 32, 105, 112, 115, 117, 260, 100, 111, 108, 257, 32, 115, 105, 116, 32, 97, 109, 101, 116, 44, 32, 99, 111, 110, 115, 101, 99, 116, 278, 117, 114, 275, 100, 262, 105, 115, 99, 105, 110, 103, 32, 101, 108, 273, 46, 32, 83, 101, 100, 281, 283, 285, 113, 117, 97, 274, 108, 117, 287, 117, 115, 32, 110, 117, 110, 99, 322, 286, 32, 317, 319, 115, 305, 65, 303, 313, 276, 32, 288, 109, 112, 270, 101, 103, 101, 115, 116, 97, 321, 262, 264, 109, 280, 109, 268, 303, 321, 264, 296, 294, 274, 285, 260, 354, 108, 345, 116, 105, 101, 301, 117, 305, 80, 114, 97, 345, 101, 110, 274, 117, 108, 116, 114, 105, 297, 345, 32, 118, 105, 118, 101, 114, 374, 329, 105, 98, 391, 111, 280, 285, 309, 98, 108, 97, 110, 293, 274, 390, 303, 274, 97, 335, 314, 260, 272, 410, 277, 116, 305, 68, 117, 295, 32, 112, 380, 388, 110, 97, 291, 366, 301, 274, 383, 358, 32, 257, 427, 258, 275, 318, 116, 257, 305, 86, 389, 276, 320, 322, 395, 104, 301, 120, 280, 282, 284, 286, 288, 116, 290, 32, 104, 377, 100, 258, 383, 274, 339, 424, 349, 110, 280, 118, 268, 117, 116, 112, 315, 327, 326, 404, 288, 305, 70, 320, 99, 369, 343, 366, 348, 301, 344, 466, 257, 441, 291, 110, 328, 362, 112, 391, 372, 104, 348, 302, 317, 321, 390, 104, 384, 380, 97, 435, 114, 297, 431, 32, 293, 337, 100, 97, 112, 395, 320, 280, 278, 32, 109, 411, 345, 314, 519, 32, 102, 302, 422, 403, 257, 101, 278, 305, 69, 367, 337, 114, 474, 544, 260, 341, 114, 347, 275, 378, 369, 415, 275, 417, 526, 268, 366, 368, 305, 67, 374, 357, 259, 499, 291, 267, 269, 291, 400, 329, 97, 99, 447, 102, 383, 299, 105, 108, 403, 423, 425, 298, 428, 333, 412, 337, 477, 282, 109, 354, 267, 301, 346, 280, 105, 309, 507, 509, 581, 458, 114, 521, 332, 32, 77, 97, 290, 422, 454, 285, 287, 289, 291, 454, 103, 117, 369, 527, 609, 321, 110, 283, 301, 365, 105, 533, 405, 419, 283, 328, 454, 118, 411, 356, 329, 440, 447, 111, 293, 111, 271, 308, 532, 391, 277, 378, 265, 305, 73, 110, 46)
"decompress sequence of ints" in {
val result = LZW.decompress(compressed)
result must beEqualTo(text)
}
"compress text" in {
val result = LZW.compress(text)
result must beEqualTo(compressed)
}
"compress and decompress same text" in {
val text = SOME_RAW_TEXT
val result1 = LZW.compress(text)
val result2 = LZW.decompress(result1)
result2 must beEqualTo(text)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment