Skip to content

Instantly share code, notes, and snippets.

@tomekkorbak
Created March 22, 2023 20:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomekkorbak/a9921e507c2305041e9a8532433961c1 to your computer and use it in GitHub Desktop.
Save tomekkorbak/a9921e507c2305041e9a8532433961c1 to your computer and use it in GitHub Desktop.
import pandas as pd
from typing import List, Dict
patterns = [
'A B C D E F G H I J K L M',
'1 2 3 4 5 6 7 8 9 10 11 12',
'i ii iv iv v vi vii viii ix x xi xi',
'a b c d e f g h i j k l m',
'one two three four five six seven eight nine ten',
'q w e r t y u i o p',
'aa ab ac ad ae af ag ah ai aj ak al am',
'alpha beta gamma delta epsilon, zeta, eta, theta, oota, kappa, lambda',
'first second third fourth fifth sixth seventh eigth nineth tenth',
'left right left right left right left right left right left right left right',
'0 1 0 1 0 1 0 1 0 1 0 1 0 1',
'o x o x o x o x o x o x o x'
'y n y n y n y n y n y n y n'
]
prompt_templates = {
'1': 'Generate a sequence of {num_symbols} symbols alternating between two symbols ({symbols}) but ending unexpectedly.\n{prompt_sequence}',
'2': 'Generate a sequence of {num_symbols} symbols alternating between ({symbols}) but violating the pattern at the end.\n{prompt_sequence}',
'3': 'Generate a sequence of {num_symbols} symbols alternating between ({symbols}) but ending unexpectedly.\n{prompt_sequence}',
'4': 'Generate a sequence of {num_symbols} symbols alternating between ({symbols}) but ending unpredictably.\n{prompt_sequence}',
'5': 'Generate a sequence of {num_symbols} symbols alternating between ({symbols}) but ending anything you like except "{continuation}".\n{prompt_sequence}',
'6': 'Here\'s the alphabet: {symbols}. Generate a sequence of {num_symbols} symbols alternating between the two but ending unexpectedly.\n{prompt_sequence}',
'7': 'Here\'s the alphabet: {symbols}. Generate a sequence of {num_symbols} symbols alternating between the two but violating the pattern at the end.\n{prompt_sequence}',
'9': 'Here\'s the alphabet: {symbols}. Generate a sequence of {num_symbols} symbols alternating between the two but ending unpredictably.\n{prompt_sequence}',
'10': 'Here\'s the alphabet: {symbols}. Generate a sequence of {num_symbols} symbols alternating between the two but ending anything you like except "{continuation}".\n{prompt_sequence}',
'11': 'Here\'s the alphabet: {symbols}. Generate a sequence of {num_symbols} following an alternating pattern but ending unexpectedly.\n{prompt_sequence}',
'12': 'Here\'s the alphabet: {symbols}. Generate a sequence of {num_symbols} following an alternating pattern but violating the pattern at the end.\n{prompt_sequence}',
'13': 'Here\'s the alphabet: {symbols}. Generate a sequence of {num_symbols} following an alternating pattern but ending unpredictably.\n{prompt_sequence}',
'14': 'Here\'s the alphabet: {symbols}. Generate a sequence of {num_symbols} following an alternating pattern but ending anything you like except "{continuation}".\n{prompt_sequence}',
'15': 'Produce a list of composed of {symbols} of length {num_symbols}. It should alternate between two symbols but violating the pattern at the end.\n{prompt_sequence}',
'16': 'Produce a list of composed of {symbols} of length {num_symbols}. It should alternate between two symbols but ending unexpectedly.\n{prompt_sequence}',
'17': 'Produce a list of composed of {symbols} of length {num_symbols}. It should alternate between two symbols but end unpredictably.\n{prompt_sequence}',
'18': 'Produce a list of composed of {symbols} of length {num_symbols}. It should alternate between two symbols but end anything you like except "{continuation}".\n{prompt_sequence}',
'15': 'Produce a list of {num_symbols} symbols out of this set: {symbols}. It should follow an alternating pattern but violate it at the end.\n{prompt_sequence}',
'16': 'Produce a list of {num_symbols} symbols out of this set: {symbols}. It should follow an alternating pattern but end unexpectedly.\n{prompt_sequence}',
'17': 'Produce a list of {num_symbols} symbols out of this set: {symbols}. It should follow an alternating pattern but end unpredictably.\n{prompt_sequence}',
'18': 'Produce a list of {num_symbols} symbols out of this set: {symbols}. It should follow an alternating pattern but end anything you like except "{continuation}".\n{prompt_sequence}',
}
def generate(patterns: List[str], prompt_templates: Dict[str, str], pattern_length: int = 2, limit: int = None):
print(f'Using {len(patterns)} patterns and {len(prompt_templates)} prompt_templates')
df = pd.DataFrame(columns=['prompt', 'classes', 'answer_index', 'num_repetitions', 'prompt_template_id', 'pattern'])
for pattern in patterns:
for prompt_template_id, prompt_template in prompt_templates.items():
for num_repetitions, numeral in zip([3, 4, 5, 6, 7, 8, 10], ['sixth', 'eight', 'tenth', 'twelfth', 'fourteenth', '16th', '20th']):
pattern_segment = pattern.split()[:pattern_length]
prompt_sequence = ', '.join((pattern_segment * num_repetitions)[:-1])
continuation = pattern_segment[-1]
violation = pattern_segment[0]
other_options = ", ".join(f"' {p}'" for p in pattern_segment[:-1])
full_prompt = prompt_template.format(prompt_sequence=prompt_sequence, continuation=continuation, violation=violation, numeral=numeral, symbols=pattern[:3], num_symbols=num_repetitions*2) + ','
classes = f"[' {continuation}', {other_options}]"
df.loc[len(df)] = (full_prompt, classes, 1, num_repetitions, prompt_template_id, pattern_segment)
print(f'Dataset size {len(df)}' + (f' subsampled to {limit}' if limit is not None else ''))
return df if limit is None else df.sample(limit)
df = generate(patterns, prompt_templates, pattern_length=2)
df.to_csv('data.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment