Created
July 11, 2015 21:57
-
-
Save romatthe/99dab79cedbc79f7bf4f to your computer and use it in GitHub Desktop.
ScanLogs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package viciffy | |
import scala.collection.JavaConverters._ | |
import scala.concurrent.ExecutionContext.Implicits.global | |
import scala.concurrent.{Await, Future} | |
import scala.concurrent.duration._ | |
import java.io.{InputStream, File} | |
import java.util.Calendar | |
import java.util.zip.{ZipEntry, ZipFile} | |
import java.text.SimpleDateFormat | |
import com.github.tototoshi.csv._ | |
import scala.io.Source | |
object Viciffy extends App { | |
// Custom format with ';' delimiter used by Scala-CSV | |
implicit object MyFormat extends DefaultCSVFormat { | |
override val delimiter = ';' | |
} | |
def scanZip(file: File): Future[List[(String, String, String)]] = Future { | |
val zip = new ZipFile(file.getAbsolutePath) | |
// Cast the java Iterable to a Scala collection | |
val entries = zip.entries.asScala | |
try { | |
// Foreach over each file in the zip that contains "Almex.CDF.Atos.log.txt" in its name | |
val dikpiet = entries filter (_.getName.contains("Almex.CDF.Atos.log.txt")) flatMap { | |
// Get the InputStream for the proper file, open said InputStream and get all lines where Regex has 1+ match | |
f => scanSource(scala.io.Source.fromInputStream(zip.getInputStream(f))(io.Codec("ISO-8859-1"))) | |
} toList | |
entries filter (_.getName.contains("Almex.CDF.Atos.log.txt")) flatMap { | |
// Get the InputStream for the proper file, open said InputStream and get all lines where Regex has 1+ match | |
f => scanSource(scala.io.Source.fromInputStream(zip.getInputStream(f))(io.Codec("ISO-8859-1"))) | |
} toList | |
} | |
catch { | |
case fnf: java.io.FileNotFoundException => List.empty | |
case ze: java.util.zip.ZipException => List.empty | |
case uc: java.nio.charset.UnmappableCharacterException => List.empty | |
} | |
} | |
def scanSource(source: Source): List[(String, String, String)] = { | |
// Get all lines where Regex has 1+ match | |
source.getLines().filter(pattern.findAllIn(_).nonEmpty).map { | |
// If there is at least one match of the Regex, add this to the list of current matches for the .zip | |
s => { | |
val separated = s.split(";") | |
val timeStamp = "%s %s".format(separated(0), separated(1)) | |
("TO BE REMOVED", timeStamp, separated(4)) | |
} | |
} toList | |
} | |
// Define the Regex pattern to detect the fault-lines | |
val pattern = "50 45 ([0-9]{2} ){15}?60".r | |
// Create the list that will hold all the matches at the end | |
var matchesAll: List[List[String]] = List.empty | |
// Iterate over all all files on the TVM share | |
val futures = for { | |
file <- new File("/Users/robin/Development/Src/Scala/viciffy").listFiles.filter(_.getName.toLowerCase.endsWith(".zip")) | |
} yield scanZip(file) | |
futures.foreach(Await.result(_, 100000000.seconds)) | |
// This list potentially contains a large amount of duplicates (duplicate log files are the cause) | |
// Take only the unique entries | |
matchesAll = matchesAll.distinct | |
val today = Calendar.getInstance().getTime | |
val format = new SimpleDateFormat("yyyy-MM-dd-hh-mm-ss") | |
// Dump it to a CSV! | |
val file = new File("Result-%s.csv".format(format.format(today))) | |
val writer = CSVWriter.open(file) | |
writer.writeAll(matchesAll) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment