Created
February 1, 2017 03:07
-
-
Save avvi00/f5ab036e8324fcbfe185bec2db1ccc4d to your computer and use it in GitHub Desktop.
Reads s3 access logs from piped input and outputs powershell objects that can then be grouped/sorted
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$pat = '(?<BucketOwner>\S+) (?<BucketName>\S+) \[(?<Time>.*?)] (?<RemoteIp>\S+) (?<Requester>\S+) (?<RequestId>\S+) (?<Operation>\S+) (?<ObjectKey>\S+) (?<RequestUri>"([^"]+))" (?<HttpStatus>\S+) (?<ErrorCode>\S+) (?<BytesSent>\S+) (?<ObjectSize>\S+) (?<TotalTime>\S+) (?<TurnaroundTime>\S+) (?<Referrer>\S+) (?<UserAgent>"([^"]+)") (?<VersionId>\S+)' | |
$regEx = New-Object System.Text.RegularExpressions.Regex($pat) | |
$dict = @{} | |
$names = $regEx.GetGroupNames() | |
$indexes = $regEx.GetGroupNumbers() | |
for($i = 0; $i -lt $names.Length; $i++) { | |
$dict.Add($names[$i],$indexes[$i]) | |
} | |
Function Read-S3Log([Parameter(ValueFromPipeline=$true)][String[]] $Files) { | |
Process { | |
foreach ($f in $Files) { | |
Get-Content $f | % { | |
$res = $regEx.Match($_) | |
$propsDict = @{} | |
foreach ($e in $dict.GetEnumerator()) { | |
if ($e.Key -match '\D') { | |
if ($e.Key -eq 'Time') { | |
$propsDict.Add($e.Key, [DateTime]::ParseExact(($res.Groups[$dict[$e.Key]].Value), 'dd/MMM/yyyy:HH:mm:ss zzz', $null)) | |
} | |
else { | |
$propsDict.Add($e.Key, $res.Groups[$dict[$e.Key]].Value) | |
} | |
} | |
} | |
New-Object psobject -Property $propsDict | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment