Skip to content

Instantly share code, notes, and snippets.

@standbyme
Created November 7, 2018 11:54
Show Gist options
  • Save standbyme/ad18cb79e1394ff9a169b2ed644c8ba6 to your computer and use it in GitHub Desktop.
Save standbyme/ad18cb79e1394ff9a169b2ed644c8ba6 to your computer and use it in GitHub Desktop.
FIle clean
import re
def solve_specific(line: str):
matched = re.match(r'\[(.*)\]\[(.*?)\]', line)
name = matched.group(1)
label = matched.group(2)
return '\n'.join(list(map(lambda x: '{} B-{}'.format(x[1], label) if x[0] == 0 else '{} I-{}'.format(x[1], label), enumerate(name))))
def solve(line: str):
if line == '':
return ''
elif line[0] == '[':
return solve_specific(line)
else:
return '\n'.join(list(map(lambda x: '{} 0'.format(x), line)))
def clean(line: str)->str:
split_result = filter(None, re.split(
r'(\[.*?\]\[.*?\])', line, flags=re.S))
return '\n'.join(list(map(solve, split_result)))
with open('../data/train.txt', 'w') as out_file:
with open("../data/temp", "r") as in_file:
for line in in_file.readlines():
out_file.write(clean(line[:-1]))
out_file.write('\n\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment