Skip to content

Instantly share code, notes, and snippets.

@sokrato
Last active January 3, 2016 00:39
Show Gist options
  • Save sokrato/8384192 to your computer and use it in GitHub Desktop.
Save sokrato/8384192 to your computer and use it in GitHub Desktop.
解析一个912M(8e6行)的日志文本文件,同样的逻辑Python用了4'05'', C程序用了14''. 性能还是有差距的啊!
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#define MAXLINE 1024
#define DATE_FORMAT "%Y-%m-%d %H:%M:%S"
#define GAP_IN_SEC 2
int main(int argc, char* argv[])
{
struct tm tm;
time_t ts0=0, ts1=0;
char line[MAXLINE];
unsigned long lineno=0;
while (NULL!=fgets(line, MAXLINE, stdin)) {
++lineno;
if (NULL!=strptime(line+5, DATE_FORMAT, &tm)) {
ts1 = mktime(&tm);
if (ts1 > ts0+GAP_IN_SEC && ts0>0) {
printf("%6lu %s", lineno, line);
}
ts0 = ts1;
} else {
// printf("cant parse time\n");
}
}
if (ferror(stdin)) {
perror("fgets error");
exit(1);
}
printf("\n");
return 0;
}
#-*- coding:utf8 -*-
from __future__ import print_function
import sys
import time
def filter(file):
lineno = 0
ts0 = 0
doprint = False
for line in file:
lineno += 1
ts = time.mktime(time.strptime(line[5:24], '%Y-%m-%d %H:%M:%S'))
if ts > ts0+2 and ts0>0:
print('%6d %s' % (lineno, line), end='')
ts0 = ts
if __name__ == '__main__':
filter(sys.stdin)
@sokrato
Copy link
Author

sokrato commented Jan 12, 2014

time 结果对比
Python:

real 3m42.473s
user 3m30.885s
sys 0m11.081s

C:

real 0m16.278s
user 0m10.465s
sys 0m5.744s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment