Skip to content

Instantly share code, notes, and snippets.

@hargup
Created October 19, 2022 03:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hargup/d7b1baa9f64f175541fd0ab71fdecace to your computer and use it in GitHub Desktop.
Save hargup/d7b1baa9f64f175541fd0ab71fdecace to your computer and use it in GitHub Desktop.
Read a text file and converts into a jsonl data where each line is the prompt for the next line.
import fileinput
import json
# Reads from STDIN and write to STDOUT
prompt_completion_pairs = []
def process(line, last_line):
prompt_completion_pairs.append((last_line, line))
def valid_json(json_str):
try:
json.loads(json_str)
return True
except:
return False
return True
last_line = None
for line in fileinput.input():
line = line.replace('\n', '')
if last_line is None:
last_line = line
process(line, last_line)
last_line = line
def prompt_pairs_to_jsonl(prompt_pairs):
jsonl = []
for prompt,completion in prompt_pairs:
if not prompt:
next
if not completion:
next
line = '{"prompt":"%s","completion":"%s"}' % (repr(prompt).replace('\"', '').replace("\'", ""), repr(completion).replace('\"', '').replace("\'", ""))
if(valid_json(line)):
jsonl.append(line)
# else:
# print("%s is not valid json"% line)
return "\n".join(jsonl)
print(prompt_pairs_to_jsonl(prompt_completion_pairs))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment