Skip to content

Instantly share code, notes, and snippets.

@adelavega
Last active August 29, 2015 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adelavega/a57e7c08ce39c0063292 to your computer and use it in GitHub Desktop.
Save adelavega/a57e7c08ce39c0063292 to your computer and use it in GitHub Desktop.
"""" This script generates stimuli for the keep track task.
Counterbalancing rules it tries to implement:
- Each category is used as a target equal number of times (only possible when number of targets is divisible by number of categories)
- Each word used equally often as a target, distractor, and final word
- Last word in the trial is always a distractor
- Target words and final words do not repeat across adjacent trials
- Distractors can repeat across trials
Number of targets per category:
- With 3 categories, one category has 1 target, second has 2, third has 3.
- With 4 categories, as with 3, but 4th category has 1, 2, or 3 targets (randomly chosen)
- With 5: 1, 2, 2, 3, 3
The script tries to generate stimuli according to the above rules. It is very difficult to equate number of times words are used as targets,
so the script looks for words that are elligible (e.g. words that would not repeat across trials, are in the relevant category. etc..),
and chooses from those the word that has been used the least as a target. Thus, they are not always perfectly equated, but they are relatively
evenly distributed. Of course the ratio of targets to distractors (determined by the length of trials and number of categories), will influence
how often a word is used a distractor or target.
Sometimes, the script will fail to find a word because randomly there are no words that meet all the criteria. In this case, it will stop and
tell you. Rerunning the script will often fix this problem, but if it keeps happening its likely that it's impossible to generate such stimuli.
This happens more often with a lot of 5 category trials, or if trials are too long. Messing with those parameters will help.
Usage:
- Create stimulus generator. Provide with trial structure. In this example, it will generate a set of stimuli with
four 3 category lists and three 4 category lists. Each list is 15 words long.
sg = StimGen(categories, [3, 3, 3, 3, 4, 4, 4], 15)
- Run generate function. Remember to rerun if it stopts because it couldn't find words:
sg.generate_stim()
- Save stimuli to directory stimuli/
sg.save('stimuli/')
Outputs in CSV:
- stimuli.csv - The lists in the order they are used. In all CAPS are the final tragets.
- last_targets.csv - The last targets for each trial.
- all_targets.csv - The targets (not only last) for each trial
- categories.csv - The categories used for each trial
- counts.csv - The number of times each word was used as a target, distractor,
Also outputs a JSON file (.txt) for use with the online version (that inclues categories, last targets and full stimuli lists)
"""
import random
import pandas as pd
import numpy as np
import os
import json
from os import makedirs
from os.path import exists
categories = {"Animals": ["Dog", "Cat", "Tiger", "Horse", "Lion", "Cow"], "Relatives": ["Sister", "Mother", "Brother", "Aunt", "Father", "Uncle"], "Distances": ["Mile", "Centimeter", "Inch", "Foot", "Meter", "Yard"], "Countries": [
"Germany", "Russia", "Canada", "France", "England", "Mexico"], "Metals": ["Zinc", "Tin", "Steel", "Iron", "Copper", "Platinum"], "Colors": ["Red", "Green", "Blue", "Yellow", "Black", "Orange"]}
class StimGen():
def __init__(self, categories, num_targets, len_lists = 15):
""" Keep track task stimuli generator. Provide the following:
categories: a dictionary of category names and items
num_targets: a list of the number of targets in each trial.
len_lists: How long each trial should be.
"""
self.total_categories = categories.keys() * (sum(num_targets) / len(categories))
self.categories = categories
self.num_targets = num_targets
self.len_lists = len_lists
if sum(num_targets) % len(categories) != 0:
print 'Number of categories request, ' + str(sum(num_targets)) + ', must be divisible by total number of categories available, ' + str(len(categories)) + ', for each category to be used as a target equally.'
random.shuffle(num_targets)
# Start with the easiest category always
while(num_targets[0] != pd.DataFrame(self.num_targets).min()[0]):
random.shuffle(num_targets)
def _select_trial_types(self):
## Select trial types
import copy
cats = copy.deepcopy(self.total_categories)
self.trial_types = []
for n_t in self.num_targets:
this_trial = []
for category in cats:
if not category in this_trial:
this_trial.append(category)
if len(this_trial) == n_t:
[cats.pop(cats.index(item)) for item in this_trial]
self.trial_types.append(this_trial)
break
def _choose_targets(self, trial_cats, last_targets = []):
trial_targets = []
if len(trial_cats) == 5:
choose_cats = [trial_cats[0]] + [trial_cats[1]] *2 + [trial_cats[2]] * 2 + [trial_cats[3]] * 3 + [trial_cats[4]] * 3
# Set how many to choose from each category
elif len(trial_cats) > 2:
choose_cats = [trial_cats[0]] + [trial_cats[1]] *2 + [trial_cats[2]] * 3
if len(trial_cats) >3:
n_last = random.choice([1, 2, 3])
choose_cats += [trial_cats[3]] * n_last
# Choose stimuli for each cateogry
for cat in choose_cats:
# Try stimuli that have no been distractors first, up to those that have been distractors 10 times
found = False
for num_reps in range(self.max_reps):
# Stim that have been distractors n times, and are in the category
avail_stim = self.target_dist_count[(self.target_dist_count.Words.isin(categories[cat])) & (self.target_dist_count.Target == num_reps)]
if len(avail_stim) > 0:
# Choose random stim
stim = random.choice(list(avail_stim.Words))
max_i = len(avail_stim.Words)
i = 0
while (i < max_i) and (stim in trial_targets or stim in last_targets):
stim = random.choice(list(avail_stim.Words))
i+= 1
# Only save if while loop exited from meeting stimuli conditions
if i < max_i:
trial_targets.append(stim)
found = True
self.target_dist_count.ix[self.target_dist_count.Words == stim, 'Target'] += 1
break
if not found:
raise Exception("Couldn't find Target")
return trial_targets
def _order_stim(self, trial_cats, targets, last_targets=[]):
## Words that are in the current cateogories, and thus can't be distractors
words_not = [item for sublist in [categories[key] for key in categories.keys() if key in trial_cats] for item in sublist]
random_distractors = []
for x in range(self.len_lists - len(targets) + 1):
found = False
for num_reps in range(self.max_reps):
avail_stim = self.target_dist_count[(self.target_dist_count.Words.isin(words_not) == False) & (self.target_dist_count.Distractor < num_reps)]
if len(avail_stim) > 0:
stim = random.choice(list(avail_stim.Words))
max_i = len(avail_stim.Words)
i = 0
while (i < max_i) and (stim in targets or stim in random_distractors):
stim = random.choice(list(avail_stim.Words))
i+= 1
# Only save if while loop exited from meeting stimuli conditions
if i < max_i:
random_distractors.append(stim)
found = True
self.target_dist_count.ix[self.target_dist_count.Words == stim, 'Distractor'] += 1
break
if not found:
raise Exception("Couldn't find Distractor")
# Random sequence + last one must be a distractor
sequence = random.sample(targets + random_distractors[1:-1], len(targets + random_distractors[1:-1])) + [random_distractors[-1]]
correct = [filter(lambda x: x in categories[cat], sequence)[-1] for cat in trial_cats]
# Make sure last word is not the same as last
if last_targets:
for num_reps in range(self.max_reps):
found = False
max_i = len(sequence) * 2
i = 0
avg_last = self.target_dist_count[np.in1d(self.target_dist_count.Words, correct)].Last.mean()
while (i < max_i) and ((sequence[-1] == last_targets[-1]) or (avg_last > num_reps)):
sequence = random.sample(targets + random_distractors[1:-1], len(targets + random_distractors[1:-1])) + [random_distractors[-1]]
correct = [filter(lambda x: x in categories[cat], sequence)[-1] for cat in trial_cats]
avg_last = self.target_dist_count[np.in1d(self.target_dist_count.Words, correct)].Last.mean()
i+= 1
if i < max_i:
found = True
for word in correct:
self.target_dist_count.ix[self.target_dist_count.Words == word, 'Last'] += 1
break
if not found:
raise Exception("Couldn't find sequence")
return sequence, correct
def generate_stim(self, max_reps=7):
""" Run this to generate the stimuli"""
## Put it all together
self.all_targets = []
self.all_stimuli = []
self.all_correct = []
self.target_dist_count = pd.DataFrame({'Words': [item for sublist in [categories[cat] for cat in categories] for item in sublist], 'Distractor' : 0, 'Target': 0, 'Last': 0})
self.max_reps = max_reps
self._select_trial_types()
for i, trial in enumerate(self.trial_types):
if i == 0:
last_targets = []
else:
last_targets = self.all_targets[i-1]
target_words = self._choose_targets(trial, last_targets)
all_stim, correct = self._order_stim(trial, target_words, last_targets=last_targets)
self.all_targets.append(target_words)
self.all_stimuli.append(all_stim)
self.all_correct.append(correct)
def save(self, out_dir = '../static/stimuli'):
if not exists(out_dir):
makedirs(out_dir)
js_data = []
for i, trial in enumerate(self.trial_types):
js_data.append([trial, self.all_correct[i], self.all_stimuli[i]])
json.dump(js_data, open(os.path.join(out_dir, 'stim.txt'), 'w'))
# Make last word upper case for csv
csv_all_stim = [[word.upper() if word in self.all_correct[n] else word for word in stim] for n, stim in enumerate(self.all_stimuli)]
pd.DataFrame(csv_all_stim).T.to_csv(os.path.join(out_dir, 'stimuli.csv'))
pd.DataFrame(self.all_correct).T.to_csv(os.path.join(out_dir, 'all_correct.csv'))
pd.DataFrame(self.all_targets).T.to_csv(os.path.join(out_dir, 'all_targets.csv'))
pd.DataFrame(self.trial_types).T.to_csv(os.path.join(out_dir, 'categories.csv'))
pd.DataFrame(self.target_dist_count).to_csv(os.path.join(out_dir, 'counts.csv'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment