This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var accessLogs = sc.textFile("/data/spark/project/access/access.log.45.gz") | |
//Check whats in the RDD. Each record of accessLogs RDDs should be the line //from the files in folder | |
accessLogs.take(10) | |
//Keep only the lines which have IP | |
def containsIP(line:String):Boolean = return line matches "^([0-9\\.]+) .*$" | |
var ipaccesslogs = accessLogs.filter(containsIP) | |
//Extract only IP |
NewerOlder