Skip to content

Instantly share code, notes, and snippets.

@FoobarProtocol
Created October 21, 2023 23:46
Show Gist options
  • Save FoobarProtocol/cc7c2fa8e544bcf3b2c028fd61799a15 to your computer and use it in GitHub Desktop.
Save FoobarProtocol/cc7c2fa8e544bcf3b2c028fd61799a15 to your computer and use it in GitHub Desktop.
This brief piece of code outlines how to convert an alpca
def convert_alpaca_to_evol(
file_path: str,
lines: bool = False,
output_file: str = "converted_alpaca.json"
):
"""Convert the Instruction/Input/Output format of Alpaca Instruct datasets
to the Evol-Instruct format of Instruction/Output. Inputs are appended to the
instructions.
Args:
file_path: the file path to a single JSON file in alpaca format
lines: Set to True if the input is a JSONL file, the default is False
Returns: a list of the instruction-output pairs generated from the alpaca set"""
result = []
if lines:
with open(file_path, "r") as json_file:
loaded_json = [json.loads(line) for line in json_file]
for record in loaded_json:
if record["instances"][0]["input"]:
record["instruction"] += '\n' + record["instances"][0]["input"]
result.append({
"instruction": record["instruction"],
"output": record["instances"][0]["output"]
})
else:
with open(file_path, "r") as json_file:
loaded_json = json.load(json_file)
for record in loaded_json:
if record["input"]:
record["instruction"] += '\n' + record["input"]
result.append({
"instruction": record["instruction"],
"output": record["output"]
})
with open(output_file, "w") as fp:
json.dump(result, fp)
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment