Instantly share code, notes, and snippets.

Embed
What would you like to do?
Given a file produced by the chunklink.pl script, return a conll2000 format file to stdout.
"""
Given a file generated by chunklink, convert to conll2000 format
"""
from __future__ import print_function
import argparse
def main():
with open(args.file_path) as f:
for line in f:
line = line.rstrip("\n")
# Skip lines starting with #
if line.startswith("#"):
continue
fields = line.split()
# Print empty line if empty line is input
if fields == []:
print()
continue
chunk_tag = fields[2]
pos_tag = fields[3]
word = fields[4]
print("{} {} {}".format(word, pos_tag, chunk_tag))
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=("Given a file generated by chunklink, generate a "
"return a conll2000 format file to stdout."),
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("file_path", type=str,
help=("Path to the file generated by chunklink."))
args = parser.parse_args()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment