Skip to content

Instantly share code, notes, and snippets.

@yhuag
Created March 28, 2020 15:05
Show Gist options
  • Save yhuag/d8cb7d151f817ff1def6a2a53697a803 to your computer and use it in GitHub Desktop.
Save yhuag/d8cb7d151f817ff1def6a2a53697a803 to your computer and use it in GitHub Desktop.
import pandas as pd
import textwrap
import csv
# Configs
MAX_CHAR_LEN = 133
INPUT_FILE_NAME = "/Users/yao-chiehhu/Downloads/AICUP_DATA.csv"
OUTPUT_FILE_NAME = "/Users/yao-chiehhu/Downloads/AICUP_OUTPUT.csv"
data = pd.read_csv(INPUT_FILE_NAME)
wrapper = textwrap.TextWrapper(width=MAX_CHAR_LEN) # break_on_hyphens=False can be used
# Wrap all texts
word_list_all_len = []
word_list_all = []
for index, row in data.iterrows():
word_list = wrapper.wrap(text=row["text"])
word_list_all_len.append(len(word_list))
word_list_all.append(word_list)
# Sanity check:
print(max(word_list_all_len))
print(min(word_list_all_len))
# Write to CSV
with open(OUTPUT_FILE_NAME, 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for item in word_list_all:
wr.writerow(",".join(item))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment