Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
First try to parse IIS (w3c extended format log files with Scala
// http://stackoverflow.com/questions/1284423/read-entire-file-in-scala
//val source = scala.io.Source.fromFile("G:\\test\\2015-12 Logging Eventhub\\W3SVC916189047\\u_ex151130.log")
val source = scala.io.Source.fromFile("testlog.log")
val lines = source.getLines()
//getLogItems(lines).take(5).foreach { line => println(line) }
getLogItems(lines).foreach { line => println(line) }
class LogItemsIterator(lines : Iterator[String]) extends Iterator[collection.mutable.Map[String, String]] {
var _nextLine = None : Option[collection.mutable.Map[String, String]];
var _columns : Array[String] = null;
var _hasNext = None : Option[Boolean];
def hasNext() : Boolean = {
_hasNext match {
case None => {
return !nextLine().isEmpty;
}
case Some(value) => return value;
}
}
def nextLine() : Option[collection.mutable.Map[String, String]] = {
var found : String = null;
// http://www.tutorialspoint.com/scala/scala_break_statement.htm
val loop = new scala.util.control.Breaks;
loop.breakable {
while (lines.hasNext) {
val line = lines.next()
if (line.length == 0) {
found = line;
loop.break;
}
if (line.charAt(0) == '#') {
println("#: " + line);
if (line.startsWith("#Fields: ")) {
val allColumns = line.split(" ");
_columns = allColumns.drop(1); // remove "#Fields"
}
}
else {
found = line;
loop.break;
}
}
}
val foundHash = new collection.mutable.HashMap[String, String];
if (found == null) {
_hasNext = Some(false);
_nextLine = None;
return None;
}
val _values = found.split(" ");
for (idx <- 0 to _values.length-1) {
foundHash += _columns(idx) -> _values(idx);
}
_hasNext = Some(true);
_nextLine = Some(foundHash);
return Some(foundHash);
}
def next : collection.mutable.Map[String, String] = {
if (!hasNext()) {
throw new NoSuchElementException("next on empty iterator")
}
val result = _nextLine;
nextLine();
return result.get;
}
}
def getLogItems(lines : Iterator[String]) : Iterator[collection.mutable.Map[String, String]] = {
val it = new LogItemsIterator(lines)
return it;
}
#Software: Microsoft Internet Information Services 7.5
#Version: 1.0
#Date: 2015-11-30 00:00:00
#Fields: date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-win32-status time-taken
2015-11-30 00:00:00 145.85.27.38 EERSTE / - 80 - 145.85.27.38 - 200 0 0 2
2015-11-30 00:00:02 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 2
2015-11-30 00:00:04 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 3
2015-11-30 00:00:06 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 14
2015-11-30 00:00:08 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 2
2015-11-30 00:18:56 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 1
2015-11-30 00:18:58 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 200 0 0 2
2015-11-30 00:19:03 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 503 0 64 3778
2015-11-30 00:19:03 145.85.27.38 SPPING / - 80 - 145.85.27.38 - 503 0 64 3380
#Software: Microsoft Internet Information Services 7.5
#Version: 1.0
#Date: 2015-11-30 00:55:26
#Fields: date2 time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-win32-status time-taken
2015-11-30 00:55:26 145.85.27.38 GET / - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 308
2015-11-30 00:55:26 145.85.27.38 GET / - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 456
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 340
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 340
2015-11-30 00:55:26 145.85.27.38 POST /_vti_bin/publishingservice.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 340
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 289
2015-11-30 00:55:26 145.85.27.38 POST /_vti_bin/publishingservice.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 365
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 471
2015-11-30 00:55:26 127.0.0.1 GET /_vti_bin/sitedata.asmx - 80 - 127.0.0.1 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 458
2015-11-30 00:55:26 145.85.27.38 GET / - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 1 2148074254 282
2015-11-30 00:55:26 145.85.27.38 GET /_vti_bin/sitedata.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 424
2015-11-30 00:55:26 127.0.0.1 GET / - 80 - 127.0.0.1 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 471
2015-11-30 00:55:26 145.85.27.38 LAATSTE /_vti_bin/publishingservice.asmx - 80 - 145.85.27.38 Mozilla/4.0+(compatible;+MSIE+4.01;+Windows+NT;+MS+Search+6.0+Robot) 401 0 0 471
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.