Skip to content

Instantly share code, notes, and snippets.

@likejazz
Created September 23, 2019 04:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save likejazz/10a3836d3407dfa2c1437f73db045f31 to your computer and use it in GitHub Desktop.
Save likejazz/10a3836d3407dfa2c1437f73db045f31 to your computer and use it in GitHub Desktop.
# %%
import json
datas = []
# Process KorQuAD's training data.
# Counts: 60,407
for i in range(0, 1):
in_file = '/Users/likejazz/Desktop/korquad'
# in_file += '/korquad2.0_train_0' + str(i) + '.json'
in_file += '/KorQuAD_v1.0_train.json'
with open(in_file) as f:
data = json.load(f)
datas += data["data"]
# Process AIHUB's data.
# Counts: 100,268
# in_file = '/Users/likejazz/Desktop/korquad'
# in_file += '/ko_wiki_v1_squad_lint.json'
# with open(in_file) as f:
# data = json.load(f)
# datas += data["data"]
# Process KorQuAD's dev data.
# Counts: 5,774
# in_file = '/Users/likejazz/Desktop/korquad'
# in_file += '/KorQuAD_v1.0_dev.json'
# with open(in_file) as f:
# data = json.load(f)
# datas += data["data"]
# Combine together.
json_str = {"data": datas}
out_file = '/Users/likejazz/Desktop/korquad'
out_file += '/output.json'
with open(out_file, 'w') as outfile:
json.dump(json_str, outfile, indent=4, ensure_ascii=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment