Skip to content

Instantly share code, notes, and snippets.

@frank-leap
Created June 15, 2015 13:27
Show Gist options
  • Save frank-leap/73089fbdad43af309e89 to your computer and use it in GitHub Desktop.
Save frank-leap/73089fbdad43af309e89 to your computer and use it in GitHub Desktop.
Reads a CSV file with header and rows where first column is the key, writes into new file without header and where lines with duplicated key are merged into a single one
import java.io.File
import java.io.PrintWriter
import scala.annotation.migration
import scala.collection.immutable.ListMap
import scala.collection.mutable.Map
object JoinDuplicatedLines {
def main(args: Array[String]) {
val input = io.Source fromFile "input.csv"
val map = Map.empty[String, String]
try {
for (line <- input.getLines drop 1) {
val array = line.split(";") map (_.trim)
map get (array(0)) match {
case Some(str) => map += array(0) -> str.concat(";").concat(array.drop(1).mkString(";"))
case None => map += array(0) -> array.mkString(";")
}
}
} finally { input close }
val output = new PrintWriter(new File("output.csv"))
val sortedMap = ListMap(map.toSeq sortBy (_._1): _*)
try { sortedMap.values foreach { v => output println (v) } }
finally { output close }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment