Skip to content

Instantly share code, notes, and snippets.

@LenKIM
Created September 17, 2018 02:58
Show Gist options
  • Save LenKIM/dea553f924e910fb9b29b5f4403bd4ca to your computer and use it in GitHub Desktop.
Save LenKIM/dea553f924e910fb9b29b5f4403bd4ca to your computer and use it in GitHub Desktop.
파싱테스트 관련 자료
#!/usr/bin/python3
# Normal
def test_custom_parser(self):
file_paths = glob.glob("/Users/len/log-analyer-assignment/logdata/20180824/*.txt", recursive=False)
start_time = time.time()
for file_path in file_paths:
with open(str(file_path), 'r', encoding='utf8') as infile:
lines = infile.readlines()
for row_list in tqdm(lines):
a = custom_log_parser(row_list)
print(time.time() - start_time)
self.assertEqual([], [])
def test_shlex_parser(self):
file_paths = glob.glob("/Users/len/log-analyer-assignment/logdata/20180824/*.txt", recursive=False)
start_time = time.time()
for file_path in file_paths:
with open(str(file_path), 'r', encoding='utf8') as infile:
lines = infile.readlines()
for row_list in tqdm(lines):
a = get_the_request_api_and_last_one_and_datetime(row_list)
print(time.time() - start_time)
self.assertEqual([], [])
def get_the_request_api_and_last_one_and_datetime(string) -> List:
list = shlex.split(string)
request_api = list[7]
datetime = list[4]
datetime = datetime[1:]
response_time = list[14]
element_lists = [request_api, datetime, response_time]
return element_lists
def custom_log_parser(string) -> List:
qe = qp = None
row = []
quote_part = []
quote_end = ''
# for string in string.replace('\r', '').replace('\n', '').split(' '):
for string in re.sub('[\r\n]', '', string).split(' '):
if quote_part:
quote_part.append(string)
elif '' == string:
row.append('')
elif '"' == string[0]:
quote_part = [string]
quote_end = '"'
elif '[' == string[0]:
quote_part = [string]
quote_end = ']'
else:
row.append(string)
length = len(string)
if length and quote_end == string[-1]: # end quote
if length and quote_end == string[-1] != '\\':
row.append(' '.join(quote_part)[1:-1].replace('\\' + quote_end, quote_end))
quote_end = quote_part = None
return row
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment