Last active
August 29, 2015 14:26
-
-
Save adelavega/a57e7c08ce39c0063292 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""" This script generates stimuli for the keep track task. | |
Counterbalancing rules it tries to implement: | |
- Each category is used as a target equal number of times (only possible when number of targets is divisible by number of categories) | |
- Each word used equally often as a target, distractor, and final word | |
- Last word in the trial is always a distractor | |
- Target words and final words do not repeat across adjacent trials | |
- Distractors can repeat across trials | |
Number of targets per category: | |
- With 3 categories, one category has 1 target, second has 2, third has 3. | |
- With 4 categories, as with 3, but 4th category has 1, 2, or 3 targets (randomly chosen) | |
- With 5: 1, 2, 2, 3, 3 | |
The script tries to generate stimuli according to the above rules. It is very difficult to equate number of times words are used as targets, | |
so the script looks for words that are elligible (e.g. words that would not repeat across trials, are in the relevant category. etc..), | |
and chooses from those the word that has been used the least as a target. Thus, they are not always perfectly equated, but they are relatively | |
evenly distributed. Of course the ratio of targets to distractors (determined by the length of trials and number of categories), will influence | |
how often a word is used a distractor or target. | |
Sometimes, the script will fail to find a word because randomly there are no words that meet all the criteria. In this case, it will stop and | |
tell you. Rerunning the script will often fix this problem, but if it keeps happening its likely that it's impossible to generate such stimuli. | |
This happens more often with a lot of 5 category trials, or if trials are too long. Messing with those parameters will help. | |
Usage: | |
- Create stimulus generator. Provide with trial structure. In this example, it will generate a set of stimuli with | |
four 3 category lists and three 4 category lists. Each list is 15 words long. | |
sg = StimGen(categories, [3, 3, 3, 3, 4, 4, 4], 15) | |
- Run generate function. Remember to rerun if it stopts because it couldn't find words: | |
sg.generate_stim() | |
- Save stimuli to directory stimuli/ | |
sg.save('stimuli/') | |
Outputs in CSV: | |
- stimuli.csv - The lists in the order they are used. In all CAPS are the final tragets. | |
- last_targets.csv - The last targets for each trial. | |
- all_targets.csv - The targets (not only last) for each trial | |
- categories.csv - The categories used for each trial | |
- counts.csv - The number of times each word was used as a target, distractor, | |
Also outputs a JSON file (.txt) for use with the online version (that inclues categories, last targets and full stimuli lists) | |
""" | |
import random | |
import pandas as pd | |
import numpy as np | |
import os | |
import json | |
from os import makedirs | |
from os.path import exists | |
categories = {"Animals": ["Dog", "Cat", "Tiger", "Horse", "Lion", "Cow"], "Relatives": ["Sister", "Mother", "Brother", "Aunt", "Father", "Uncle"], "Distances": ["Mile", "Centimeter", "Inch", "Foot", "Meter", "Yard"], "Countries": [ | |
"Germany", "Russia", "Canada", "France", "England", "Mexico"], "Metals": ["Zinc", "Tin", "Steel", "Iron", "Copper", "Platinum"], "Colors": ["Red", "Green", "Blue", "Yellow", "Black", "Orange"]} | |
class StimGen(): | |
def __init__(self, categories, num_targets, len_lists = 15): | |
""" Keep track task stimuli generator. Provide the following: | |
categories: a dictionary of category names and items | |
num_targets: a list of the number of targets in each trial. | |
len_lists: How long each trial should be. | |
""" | |
self.total_categories = categories.keys() * (sum(num_targets) / len(categories)) | |
self.categories = categories | |
self.num_targets = num_targets | |
self.len_lists = len_lists | |
if sum(num_targets) % len(categories) != 0: | |
print 'Number of categories request, ' + str(sum(num_targets)) + ', must be divisible by total number of categories available, ' + str(len(categories)) + ', for each category to be used as a target equally.' | |
random.shuffle(num_targets) | |
# Start with the easiest category always | |
while(num_targets[0] != pd.DataFrame(self.num_targets).min()[0]): | |
random.shuffle(num_targets) | |
def _select_trial_types(self): | |
## Select trial types | |
import copy | |
cats = copy.deepcopy(self.total_categories) | |
self.trial_types = [] | |
for n_t in self.num_targets: | |
this_trial = [] | |
for category in cats: | |
if not category in this_trial: | |
this_trial.append(category) | |
if len(this_trial) == n_t: | |
[cats.pop(cats.index(item)) for item in this_trial] | |
self.trial_types.append(this_trial) | |
break | |
def _choose_targets(self, trial_cats, last_targets = []): | |
trial_targets = [] | |
if len(trial_cats) == 5: | |
choose_cats = [trial_cats[0]] + [trial_cats[1]] *2 + [trial_cats[2]] * 2 + [trial_cats[3]] * 3 + [trial_cats[4]] * 3 | |
# Set how many to choose from each category | |
elif len(trial_cats) > 2: | |
choose_cats = [trial_cats[0]] + [trial_cats[1]] *2 + [trial_cats[2]] * 3 | |
if len(trial_cats) >3: | |
n_last = random.choice([1, 2, 3]) | |
choose_cats += [trial_cats[3]] * n_last | |
# Choose stimuli for each cateogry | |
for cat in choose_cats: | |
# Try stimuli that have no been distractors first, up to those that have been distractors 10 times | |
found = False | |
for num_reps in range(self.max_reps): | |
# Stim that have been distractors n times, and are in the category | |
avail_stim = self.target_dist_count[(self.target_dist_count.Words.isin(categories[cat])) & (self.target_dist_count.Target == num_reps)] | |
if len(avail_stim) > 0: | |
# Choose random stim | |
stim = random.choice(list(avail_stim.Words)) | |
max_i = len(avail_stim.Words) | |
i = 0 | |
while (i < max_i) and (stim in trial_targets or stim in last_targets): | |
stim = random.choice(list(avail_stim.Words)) | |
i+= 1 | |
# Only save if while loop exited from meeting stimuli conditions | |
if i < max_i: | |
trial_targets.append(stim) | |
found = True | |
self.target_dist_count.ix[self.target_dist_count.Words == stim, 'Target'] += 1 | |
break | |
if not found: | |
raise Exception("Couldn't find Target") | |
return trial_targets | |
def _order_stim(self, trial_cats, targets, last_targets=[]): | |
## Words that are in the current cateogories, and thus can't be distractors | |
words_not = [item for sublist in [categories[key] for key in categories.keys() if key in trial_cats] for item in sublist] | |
random_distractors = [] | |
for x in range(self.len_lists - len(targets) + 1): | |
found = False | |
for num_reps in range(self.max_reps): | |
avail_stim = self.target_dist_count[(self.target_dist_count.Words.isin(words_not) == False) & (self.target_dist_count.Distractor < num_reps)] | |
if len(avail_stim) > 0: | |
stim = random.choice(list(avail_stim.Words)) | |
max_i = len(avail_stim.Words) | |
i = 0 | |
while (i < max_i) and (stim in targets or stim in random_distractors): | |
stim = random.choice(list(avail_stim.Words)) | |
i+= 1 | |
# Only save if while loop exited from meeting stimuli conditions | |
if i < max_i: | |
random_distractors.append(stim) | |
found = True | |
self.target_dist_count.ix[self.target_dist_count.Words == stim, 'Distractor'] += 1 | |
break | |
if not found: | |
raise Exception("Couldn't find Distractor") | |
# Random sequence + last one must be a distractor | |
sequence = random.sample(targets + random_distractors[1:-1], len(targets + random_distractors[1:-1])) + [random_distractors[-1]] | |
correct = [filter(lambda x: x in categories[cat], sequence)[-1] for cat in trial_cats] | |
# Make sure last word is not the same as last | |
if last_targets: | |
for num_reps in range(self.max_reps): | |
found = False | |
max_i = len(sequence) * 2 | |
i = 0 | |
avg_last = self.target_dist_count[np.in1d(self.target_dist_count.Words, correct)].Last.mean() | |
while (i < max_i) and ((sequence[-1] == last_targets[-1]) or (avg_last > num_reps)): | |
sequence = random.sample(targets + random_distractors[1:-1], len(targets + random_distractors[1:-1])) + [random_distractors[-1]] | |
correct = [filter(lambda x: x in categories[cat], sequence)[-1] for cat in trial_cats] | |
avg_last = self.target_dist_count[np.in1d(self.target_dist_count.Words, correct)].Last.mean() | |
i+= 1 | |
if i < max_i: | |
found = True | |
for word in correct: | |
self.target_dist_count.ix[self.target_dist_count.Words == word, 'Last'] += 1 | |
break | |
if not found: | |
raise Exception("Couldn't find sequence") | |
return sequence, correct | |
def generate_stim(self, max_reps=7): | |
""" Run this to generate the stimuli""" | |
## Put it all together | |
self.all_targets = [] | |
self.all_stimuli = [] | |
self.all_correct = [] | |
self.target_dist_count = pd.DataFrame({'Words': [item for sublist in [categories[cat] for cat in categories] for item in sublist], 'Distractor' : 0, 'Target': 0, 'Last': 0}) | |
self.max_reps = max_reps | |
self._select_trial_types() | |
for i, trial in enumerate(self.trial_types): | |
if i == 0: | |
last_targets = [] | |
else: | |
last_targets = self.all_targets[i-1] | |
target_words = self._choose_targets(trial, last_targets) | |
all_stim, correct = self._order_stim(trial, target_words, last_targets=last_targets) | |
self.all_targets.append(target_words) | |
self.all_stimuli.append(all_stim) | |
self.all_correct.append(correct) | |
def save(self, out_dir = '../static/stimuli'): | |
if not exists(out_dir): | |
makedirs(out_dir) | |
js_data = [] | |
for i, trial in enumerate(self.trial_types): | |
js_data.append([trial, self.all_correct[i], self.all_stimuli[i]]) | |
json.dump(js_data, open(os.path.join(out_dir, 'stim.txt'), 'w')) | |
# Make last word upper case for csv | |
csv_all_stim = [[word.upper() if word in self.all_correct[n] else word for word in stim] for n, stim in enumerate(self.all_stimuli)] | |
pd.DataFrame(csv_all_stim).T.to_csv(os.path.join(out_dir, 'stimuli.csv')) | |
pd.DataFrame(self.all_correct).T.to_csv(os.path.join(out_dir, 'all_correct.csv')) | |
pd.DataFrame(self.all_targets).T.to_csv(os.path.join(out_dir, 'all_targets.csv')) | |
pd.DataFrame(self.trial_types).T.to_csv(os.path.join(out_dir, 'categories.csv')) | |
pd.DataFrame(self.target_dist_count).to_csv(os.path.join(out_dir, 'counts.csv')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment