Skip to content

Instantly share code, notes, and snippets.

@danyaljj
Created July 12, 2021 22:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danyaljj/160ed0844d74b6ead428d30bd7975388 to your computer and use it in GitHub Desktop.
Save danyaljj/160ed0844d74b6ead428d30bd7975388 to your computer and use it in GitHub Desktop.
convert.py
import json
from os import listdir
from os.path import isfile, join
tasks_path = '/Users/danielk/ideaProjects/instructions-demo/app/app/static/tasks/'
categories = {
'task001_quoref_question_generation': 'Generation',
'task002_quoref_answer_generation': 'Generation',
'task003_mctaco_question_generation_event_duration': 'Question Generation',
'task004_mctaco_answer_generation_event_duration': 'Answer Generation',
'task005_mctaco_wrong_answer_generation_event_duration': 'Incorrect Answer Generation',
'task006_mctaco_question_generation_transient_stationary': 'Question Generation',
'task007_mctaco_answer_generation_transient_stationary': 'Answer Generation',
'task008_mctaco_wrong_answer_generation_transient_stationary': 'Incorrect Answer Generation',
'task009_mctaco_question_generation_event_ordering': 'Question Generation',
'task010_mctaco_answer_generation_event_ordering': 'Answer Generation',
'task011_mctaco_wrong_answer_generation_event_ordering': 'Incorrect Answer Generation',
'task012_mctaco_question_generation_absolute_timepoint': 'Question Generation',
'task013_mctaco_answer_generation_absolute_timepoint': 'Answer Generation',
'task014_mctaco_wrong_answer_generation_absolute_timepoint': 'Incorrect Answer Generation',
'task015_mctaco_question_generation_frequency': 'Question Generation',
'task016_mctaco_answer_generation_frequency': 'Answer Generation',
'task017_mctaco_wrong_answer_generation_frequency': 'Incorrect Answer Generation',
'task018_mctaco_temporal_reasoning_presence': 'Classification',
'task019_mctaco_temporal_reasoning_category': 'Classification',
'task020_mctaco_span_based_question': 'Classification',
'task021_mctaco_grammatical_logical': 'Classification',
'task022_cosmosqa_passage_inappropriate_binary': '',
'task023_cosmosqa_question_generation': 'Generation',
'task024_cosmosqa_answer_generation': 'Generation',
'task025_cosmosqa_incorrect_answer_generation': 'Answer Generation',
'task026_drop_question_generation': 'Generation',
'task027_drop_answer_type_generation': 'Classification',
'task028_drop_answer_generation': 'Generation',
'task029_winogrande_full_object': 'Text Modification',
'task030_winogrande_full_person': 'Text Modification',
'task031_winogrande_question_generation_object': 'Generation',
'task032_winogrande_question_generation_person': 'Generation',
'task033_winogrande_answer_generation': 'Generation',
'task034_winogrande_question_modification_object': 'Text Modification',
'task035_winogrande_question_modification_person': 'Text Modification',
'task036_qasc_topic_word_to_generate_related_fact': 'Text Modification',
'task037_qasc_generate_related_fact': 'Text Modification',
'task038_qasc_combined_fact': 'Text Modification',
'task039_qasc_find_overlapping_words': 'Verification',
'task040_qasc_question_generation': 'Generation',
'task041_qasc_answer_generation': 'Generation',
'task042_qasc_incorrect_option_generation': 'Answer Generation',
'task043_essential_terms_answering_incomplete_questions': 'Answer Generation',
'task044_essential_terms_identifying_essential_words': 'Verification',
'task045_miscellaneous_sentence_paraphrasing': 'Text Modification',
'task046_miscellaenous_question_typing': 'Classification',
'task047_miscellaenous_answering_science_questions': 'Generation',
'task048_multirc_question_generation': 'Generation',
'task049_multirc_questions_needed_to_answer': 'Classification',
'task050_multirc_answerability': 'Classification',
'task051_multirc_correct_answer_single_sentence': 'Generation',
'task052_multirc_identify_bad_question': 'Classification',
'task053_multirc_correct_bad_question': 'Text Modification',
'task054_multirc_write_correct_answer': 'Generation',
'task055_multirc_write_incorrect_answer': 'Answer Generation',
'task056_multirc_classify_correct_answer': 'Classification',
'task057_multirc_classify_incorrect_answer': 'Classification',
'task058_multirc_question_answering': 'Generation',
'task059_ropes_story_generation': 'Text Modification',
'task060_ropes_question_generation': 'Generation',
'task061_ropes_answer_generation': 'Generation'
}
table = []
files = [f for f in listdir(tasks_path) if isfile(join(tasks_path, f))]
for file in files:
if '.json' not in file:
continue
file_path = tasks_path + file
with open(file_path, 'r') as f:
# print(" - - - - - ")
# print(f)
data = json.load(f)
positive_examples = []
for x in data['Examples']['Positive Examples']:
positive_examples.append({'input': x['input'], 'output': x['output'], 'explanation': x['reason']})
negative_examples = []
for x in data['Examples']['Negative Examples']:
if type(x) != dict:
continue
if 'suggestion' in x and len(x['suggestion']) > 10:
negative_examples.append(
{'input': x['input'], 'output': x['output'], 'explanation': x['reason'] + ' ' + x['suggestion']}
)
else:
negative_examples.append({'input': x['input'], 'output': x['output'], 'explanation': x['reason']})
file = file.replace('subtask', 'task').replace('_misc_', '_miscellaenous_')
cat = categories[file.replace(".json", '')]
print(f"{file}\t{data['Prompt']}\t{data['Title']}\t{cat}")
new_data = {
'Contributors': ["Swaroop Mishra", "Daniel Khashabi"],
'Categories': [cat],
'Definition': data['Definition'] +
'\nThings to avoid: ' + data['Things to Avoid'] +
'\nEmphasis & Caution: ' + data['Emphasis & Caution'],
'Positive Examples': positive_examples,
'Negative Examples': negative_examples,
'Instances': data['Instances'][:6500]
}
outfile = open(f'/Users/danielk/ideaProjects/natural-instructions-expansion/tasks/{file}', 'w+')
outfile.write(
json.dumps(new_data, indent=4)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment