Skip to content

Instantly share code, notes, and snippets.

@manuzhang
Created October 26, 2021 09:25
Show Gist options
  • Save manuzhang/851140f82df234db19b510ad584392f2 to your computer and use it in GitHub Desktop.
Save manuzhang/851140f82df234db19b510ad584392f2 to your computer and use it in GitHub Desktop.
package org.apache.spark
import java.io.{BufferedInputStream, PrintWriter}
import java.net.URI
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{LocalFileSystem, Path}
import org.apache.spark.io.CompressionCodec
import scala.io.Source
import scala.util.Try
object Lz4Decompressor extends App {
val fs = new LocalFileSystem
fs.initialize(new URI("file:///"), new Configuration)
val input = args(0)
val inputStream = {
val in = new BufferedInputStream(fs.open(new Path(input)))
try {
val codec = CompressionCodec.createCodec(new SparkConf, "lz4")
codec.compressedInputStream(in)
} catch {
case e: Throwable =>
in.close()
throw e
}
}
val output = input.stripSuffix(".lz4")
val writer = new PrintWriter(output)
Source.fromInputStream(inputStream).getLines().foreach(writer.println)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment