Skip to content

Instantly share code, notes, and snippets.

@romatthe
Created July 11, 2015 21:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save romatthe/99dab79cedbc79f7bf4f to your computer and use it in GitHub Desktop.
Save romatthe/99dab79cedbc79f7bf4f to your computer and use it in GitHub Desktop.
ScanLogs
package viciffy
import scala.collection.JavaConverters._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.{Await, Future}
import scala.concurrent.duration._
import java.io.{InputStream, File}
import java.util.Calendar
import java.util.zip.{ZipEntry, ZipFile}
import java.text.SimpleDateFormat
import com.github.tototoshi.csv._
import scala.io.Source
object Viciffy extends App {
// Custom format with ';' delimiter used by Scala-CSV
implicit object MyFormat extends DefaultCSVFormat {
override val delimiter = ';'
}
def scanZip(file: File): Future[List[(String, String, String)]] = Future {
val zip = new ZipFile(file.getAbsolutePath)
// Cast the java Iterable to a Scala collection
val entries = zip.entries.asScala
try {
// Foreach over each file in the zip that contains "Almex.CDF.Atos.log.txt" in its name
val dikpiet = entries filter (_.getName.contains("Almex.CDF.Atos.log.txt")) flatMap {
// Get the InputStream for the proper file, open said InputStream and get all lines where Regex has 1+ match
f => scanSource(scala.io.Source.fromInputStream(zip.getInputStream(f))(io.Codec("ISO-8859-1")))
} toList
entries filter (_.getName.contains("Almex.CDF.Atos.log.txt")) flatMap {
// Get the InputStream for the proper file, open said InputStream and get all lines where Regex has 1+ match
f => scanSource(scala.io.Source.fromInputStream(zip.getInputStream(f))(io.Codec("ISO-8859-1")))
} toList
}
catch {
case fnf: java.io.FileNotFoundException => List.empty
case ze: java.util.zip.ZipException => List.empty
case uc: java.nio.charset.UnmappableCharacterException => List.empty
}
}
def scanSource(source: Source): List[(String, String, String)] = {
// Get all lines where Regex has 1+ match
source.getLines().filter(pattern.findAllIn(_).nonEmpty).map {
// If there is at least one match of the Regex, add this to the list of current matches for the .zip
s => {
val separated = s.split(";")
val timeStamp = "%s %s".format(separated(0), separated(1))
("TO BE REMOVED", timeStamp, separated(4))
}
} toList
}
// Define the Regex pattern to detect the fault-lines
val pattern = "50 45 ([0-9]{2} ){15}?60".r
// Create the list that will hold all the matches at the end
var matchesAll: List[List[String]] = List.empty
// Iterate over all all files on the TVM share
val futures = for {
file <- new File("/Users/robin/Development/Src/Scala/viciffy").listFiles.filter(_.getName.toLowerCase.endsWith(".zip"))
} yield scanZip(file)
futures.foreach(Await.result(_, 100000000.seconds))
// This list potentially contains a large amount of duplicates (duplicate log files are the cause)
// Take only the unique entries
matchesAll = matchesAll.distinct
val today = Calendar.getInstance().getTime
val format = new SimpleDateFormat("yyyy-MM-dd-hh-mm-ss")
// Dump it to a CSV!
val file = new File("Result-%s.csv".format(format.format(today)))
val writer = CSVWriter.open(file)
writer.writeAll(matchesAll)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment