Created
September 17, 2018 02:58
-
-
Save LenKIM/dea553f924e910fb9b29b5f4403bd4ca to your computer and use it in GitHub Desktop.
파싱테스트 관련 자료
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# Normal | |
def test_custom_parser(self): | |
file_paths = glob.glob("/Users/len/log-analyer-assignment/logdata/20180824/*.txt", recursive=False) | |
start_time = time.time() | |
for file_path in file_paths: | |
with open(str(file_path), 'r', encoding='utf8') as infile: | |
lines = infile.readlines() | |
for row_list in tqdm(lines): | |
a = custom_log_parser(row_list) | |
print(time.time() - start_time) | |
self.assertEqual([], []) | |
def test_shlex_parser(self): | |
file_paths = glob.glob("/Users/len/log-analyer-assignment/logdata/20180824/*.txt", recursive=False) | |
start_time = time.time() | |
for file_path in file_paths: | |
with open(str(file_path), 'r', encoding='utf8') as infile: | |
lines = infile.readlines() | |
for row_list in tqdm(lines): | |
a = get_the_request_api_and_last_one_and_datetime(row_list) | |
print(time.time() - start_time) | |
self.assertEqual([], []) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_the_request_api_and_last_one_and_datetime(string) -> List: | |
list = shlex.split(string) | |
request_api = list[7] | |
datetime = list[4] | |
datetime = datetime[1:] | |
response_time = list[14] | |
element_lists = [request_api, datetime, response_time] | |
return element_lists | |
def custom_log_parser(string) -> List: | |
qe = qp = None | |
row = [] | |
quote_part = [] | |
quote_end = '' | |
# for string in string.replace('\r', '').replace('\n', '').split(' '): | |
for string in re.sub('[\r\n]', '', string).split(' '): | |
if quote_part: | |
quote_part.append(string) | |
elif '' == string: | |
row.append('') | |
elif '"' == string[0]: | |
quote_part = [string] | |
quote_end = '"' | |
elif '[' == string[0]: | |
quote_part = [string] | |
quote_end = ']' | |
else: | |
row.append(string) | |
length = len(string) | |
if length and quote_end == string[-1]: # end quote | |
if length and quote_end == string[-1] != '\\': | |
row.append(' '.join(quote_part)[1:-1].replace('\\' + quote_end, quote_end)) | |
quote_end = quote_part = None | |
return row |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment