Created
July 12, 2021 22:50
-
-
Save danyaljj/160ed0844d74b6ead428d30bd7975388 to your computer and use it in GitHub Desktop.
convert.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from os import listdir | |
from os.path import isfile, join | |
tasks_path = '/Users/danielk/ideaProjects/instructions-demo/app/app/static/tasks/' | |
categories = { | |
'task001_quoref_question_generation': 'Generation', | |
'task002_quoref_answer_generation': 'Generation', | |
'task003_mctaco_question_generation_event_duration': 'Question Generation', | |
'task004_mctaco_answer_generation_event_duration': 'Answer Generation', | |
'task005_mctaco_wrong_answer_generation_event_duration': 'Incorrect Answer Generation', | |
'task006_mctaco_question_generation_transient_stationary': 'Question Generation', | |
'task007_mctaco_answer_generation_transient_stationary': 'Answer Generation', | |
'task008_mctaco_wrong_answer_generation_transient_stationary': 'Incorrect Answer Generation', | |
'task009_mctaco_question_generation_event_ordering': 'Question Generation', | |
'task010_mctaco_answer_generation_event_ordering': 'Answer Generation', | |
'task011_mctaco_wrong_answer_generation_event_ordering': 'Incorrect Answer Generation', | |
'task012_mctaco_question_generation_absolute_timepoint': 'Question Generation', | |
'task013_mctaco_answer_generation_absolute_timepoint': 'Answer Generation', | |
'task014_mctaco_wrong_answer_generation_absolute_timepoint': 'Incorrect Answer Generation', | |
'task015_mctaco_question_generation_frequency': 'Question Generation', | |
'task016_mctaco_answer_generation_frequency': 'Answer Generation', | |
'task017_mctaco_wrong_answer_generation_frequency': 'Incorrect Answer Generation', | |
'task018_mctaco_temporal_reasoning_presence': 'Classification', | |
'task019_mctaco_temporal_reasoning_category': 'Classification', | |
'task020_mctaco_span_based_question': 'Classification', | |
'task021_mctaco_grammatical_logical': 'Classification', | |
'task022_cosmosqa_passage_inappropriate_binary': '', | |
'task023_cosmosqa_question_generation': 'Generation', | |
'task024_cosmosqa_answer_generation': 'Generation', | |
'task025_cosmosqa_incorrect_answer_generation': 'Answer Generation', | |
'task026_drop_question_generation': 'Generation', | |
'task027_drop_answer_type_generation': 'Classification', | |
'task028_drop_answer_generation': 'Generation', | |
'task029_winogrande_full_object': 'Text Modification', | |
'task030_winogrande_full_person': 'Text Modification', | |
'task031_winogrande_question_generation_object': 'Generation', | |
'task032_winogrande_question_generation_person': 'Generation', | |
'task033_winogrande_answer_generation': 'Generation', | |
'task034_winogrande_question_modification_object': 'Text Modification', | |
'task035_winogrande_question_modification_person': 'Text Modification', | |
'task036_qasc_topic_word_to_generate_related_fact': 'Text Modification', | |
'task037_qasc_generate_related_fact': 'Text Modification', | |
'task038_qasc_combined_fact': 'Text Modification', | |
'task039_qasc_find_overlapping_words': 'Verification', | |
'task040_qasc_question_generation': 'Generation', | |
'task041_qasc_answer_generation': 'Generation', | |
'task042_qasc_incorrect_option_generation': 'Answer Generation', | |
'task043_essential_terms_answering_incomplete_questions': 'Answer Generation', | |
'task044_essential_terms_identifying_essential_words': 'Verification', | |
'task045_miscellaneous_sentence_paraphrasing': 'Text Modification', | |
'task046_miscellaenous_question_typing': 'Classification', | |
'task047_miscellaenous_answering_science_questions': 'Generation', | |
'task048_multirc_question_generation': 'Generation', | |
'task049_multirc_questions_needed_to_answer': 'Classification', | |
'task050_multirc_answerability': 'Classification', | |
'task051_multirc_correct_answer_single_sentence': 'Generation', | |
'task052_multirc_identify_bad_question': 'Classification', | |
'task053_multirc_correct_bad_question': 'Text Modification', | |
'task054_multirc_write_correct_answer': 'Generation', | |
'task055_multirc_write_incorrect_answer': 'Answer Generation', | |
'task056_multirc_classify_correct_answer': 'Classification', | |
'task057_multirc_classify_incorrect_answer': 'Classification', | |
'task058_multirc_question_answering': 'Generation', | |
'task059_ropes_story_generation': 'Text Modification', | |
'task060_ropes_question_generation': 'Generation', | |
'task061_ropes_answer_generation': 'Generation' | |
} | |
table = [] | |
files = [f for f in listdir(tasks_path) if isfile(join(tasks_path, f))] | |
for file in files: | |
if '.json' not in file: | |
continue | |
file_path = tasks_path + file | |
with open(file_path, 'r') as f: | |
# print(" - - - - - ") | |
# print(f) | |
data = json.load(f) | |
positive_examples = [] | |
for x in data['Examples']['Positive Examples']: | |
positive_examples.append({'input': x['input'], 'output': x['output'], 'explanation': x['reason']}) | |
negative_examples = [] | |
for x in data['Examples']['Negative Examples']: | |
if type(x) != dict: | |
continue | |
if 'suggestion' in x and len(x['suggestion']) > 10: | |
negative_examples.append( | |
{'input': x['input'], 'output': x['output'], 'explanation': x['reason'] + ' ' + x['suggestion']} | |
) | |
else: | |
negative_examples.append({'input': x['input'], 'output': x['output'], 'explanation': x['reason']}) | |
file = file.replace('subtask', 'task').replace('_misc_', '_miscellaenous_') | |
cat = categories[file.replace(".json", '')] | |
print(f"{file}\t{data['Prompt']}\t{data['Title']}\t{cat}") | |
new_data = { | |
'Contributors': ["Swaroop Mishra", "Daniel Khashabi"], | |
'Categories': [cat], | |
'Definition': data['Definition'] + | |
'\nThings to avoid: ' + data['Things to Avoid'] + | |
'\nEmphasis & Caution: ' + data['Emphasis & Caution'], | |
'Positive Examples': positive_examples, | |
'Negative Examples': negative_examples, | |
'Instances': data['Instances'][:6500] | |
} | |
outfile = open(f'/Users/danielk/ideaProjects/natural-instructions-expansion/tasks/{file}', 'w+') | |
outfile.write( | |
json.dumps(new_data, indent=4) | |
) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment