Vivraan/learning_model.py

## learning_model.py
import torch
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd


class LearningModel:
    def __init__(self, num_first_stage_choices=2, num_second_stage_choices=2, num_trials=2, num_epochs=1000):
        # Initialize model parameters (w, alpha, lambda, beta, stay_bias) and any other necessary variables
        self.num_first_stage_choices = num_first_stage_choices
        self.num_second_stage_choices = num_second_stage_choices
        self.num_trials = num_trials
        self.num_trials = 2
        self.num_epochs = num_epochs
        self.alpha = 0.1
        self.lambda_val = 0.5
        self.beta = 1.0
        self.stay_bias = 0.2
        self.weight = 0.5

        # Move the tensors to the GPU if available
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.num_first_stage_choices = torch.tensor(num_first_stage_choices, device=device)
        self.num_second_stage_choices = torch.tensor(num_second_stage_choices, device=device)
        self.alpha = torch.tensor(self.alpha, device=device)
        self.lambda_val = torch.tensor(self.lambda_val, device=device)
        self.beta = torch.tensor(self.beta, device=device)
        self.stay_bias = torch.tensor(self.stay_bias, device=device)
        self.weight = torch.tensor(self.weight, device=device)

    def hybrid_algorithm(self, data):
        # Initialize transition probabilities as PyTorch tensors
        transition_probs = torch.tensor([[0.7, 0.3], [0.3, 0.7]], dtype=torch.float32)

        # Initialize first-stage and second-stage stimulus values as PyTorch tensors
        first_stage_values = torch.zeros(self.num_first_stage_choices, dtype=torch.float32)
        second_stage_values = torch.zeros((self.num_first_stage_choices, self.num_second_stage_choices),
                                          dtype=torch.float32)

        for trial in range(self.num_trials):
            # Reset eligibility trace at the beginning of each trial
            eligibility_trace = torch.zeros((self.num_first_stage_choices, self.num_second_stage_choices),
                                            dtype=torch.float32)

            # Initialize first-stage and second-stage choices
            first_stage_choice = 0
            second_stage_choice = 0

            for step in range(len(data[trial])):
                # Update first-stage choice and second-stage choice
                if step == 0:
                    first_stage_choice = data[trial][step][0]
                second_stage_choice = data[trial][step][1]

                # Update second-stage stimulus values based on model-based algorithm
                reward = data[trial][step][2]
                expected_reward = (transition_probs[first_stage_choice][0] * second_stage_values[0] +
                                   transition_probs[first_stage_choice][1] * second_stage_values[1])
                prediction_error_model_based = reward - expected_reward

                # Update second-stage values based on prediction error and learning rate
                prediction_error_sarsa = reward - second_stage_values[first_stage_choice][second_stage_choice]
                eligibility_trace[first_stage_choice][second_stage_choice] += 1
                second_stage_values += self.alpha * prediction_error_sarsa * eligibility_trace
                eligibility_trace *= self.lambda_val

                # Update first-stage stimulus values
                stay_bias_term = self.stay_bias * first_stage_values[first_stage_choice]
                w = self.weight
                prediction_error_hybrid = w * prediction_error_model_based + (1 - w) * prediction_error_sarsa
                first_stage_values[first_stage_choice] += self.alpha * prediction_error_hybrid - stay_bias_term

            # Update second-stage values after the last step in the trial
            prediction_error = 0 - second_stage_values[first_stage_choice][second_stage_choice]
            second_stage_values += self.alpha * prediction_error * eligibility_trace

        # Calculate action probabilities using softmax choice rule
        first_stage_action_probs = F.softmax(self.beta * first_stage_values, dim=0)

        return first_stage_action_probs, second_stage_values

    def log_likelihood(self, data) -> torch.Tensor:
        # TODO Calculate the log-likelihood of the data given the model parameters
        # hint: use the hybrid algorithm to calculate the likelihood.
        pass

    def log_prior(self) -> torch.Tensor:
        # TODO Calculate the log-prior of the model parameters (if using Bayesian approach)
        # hint: use the prior distribution of the parameters to calculate the log-prior.
        pass

    def log_posterior(self, data) -> torch.Tensor:
        # Calculate the log-posterior (log-likelihood + log-prior) of the model parameters
        log_likelihood = self.log_likelihood(data)
        log_prior = self.log_prior()
        return log_likelihood + log_prior

    def fit(self, data):
        # TODO fix this garbage

        # Define the objective function (negative log-posterior) and optimizer
        optimizer = optim.Adam(lr=0.01)

        for epoch in range(self.num_epochs):
            # Clear gradients
            optimizer.zero_grad()

            # Compute log-posterior and loss
            log_posterior = self.log_posterior(data)
            loss = -log_posterior

            # Backpropagation
            loss.backward()
            optimizer.step()

            # TODO Print progress or monitor convergence criteria

        # Return the estimated parameter values
        return self.get_parameters()


def fit_all_users():
    # Example usage
    data = pd.read_csv("Gtaskmb_E2_clean.csv")  # Replace with the path to your dataset

    num_first_stage_choices = 2
    num_second_stage_choices = 2
    num_epochs = 100

    # Create an instance of the learning model
    model = LearningModel(num_first_stage_choices, num_second_stage_choices, num_epochs)

    # TODO Run this estimation for each user in your dataset and save the estimated parameters for each user
    estimated_parameters = model.fit(data)


if __name__ == "__main__":
    fit_all_users()
	import torch
	import torch.optim as optim
	import torch.nn.functional as F
	import pandas as pd


	class LearningModel:
	def __init__(self, num_first_stage_choices=2, num_second_stage_choices=2, num_trials=2, num_epochs=1000):
	# Initialize model parameters (w, alpha, lambda, beta, stay_bias) and any other necessary variables
	self.num_first_stage_choices = num_first_stage_choices
	self.num_second_stage_choices = num_second_stage_choices
	self.num_trials = num_trials
	self.num_trials = 2
	self.num_epochs = num_epochs
	self.alpha = 0.1
	self.lambda_val = 0.5
	self.beta = 1.0
	self.stay_bias = 0.2
	self.weight = 0.5

	# Move the tensors to the GPU if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.num_first_stage_choices = torch.tensor(num_first_stage_choices, device=device)
	self.num_second_stage_choices = torch.tensor(num_second_stage_choices, device=device)
	self.alpha = torch.tensor(self.alpha, device=device)
	self.lambda_val = torch.tensor(self.lambda_val, device=device)
	self.beta = torch.tensor(self.beta, device=device)
	self.stay_bias = torch.tensor(self.stay_bias, device=device)
	self.weight = torch.tensor(self.weight, device=device)

	def hybrid_algorithm(self, data):
	# Initialize transition probabilities as PyTorch tensors
	transition_probs = torch.tensor([[0.7, 0.3], [0.3, 0.7]], dtype=torch.float32)

	# Initialize first-stage and second-stage stimulus values as PyTorch tensors
	first_stage_values = torch.zeros(self.num_first_stage_choices, dtype=torch.float32)
	second_stage_values = torch.zeros((self.num_first_stage_choices, self.num_second_stage_choices),
	dtype=torch.float32)

	for trial in range(self.num_trials):
	# Reset eligibility trace at the beginning of each trial
	eligibility_trace = torch.zeros((self.num_first_stage_choices, self.num_second_stage_choices),
	dtype=torch.float32)

	# Initialize first-stage and second-stage choices
	first_stage_choice = 0
	second_stage_choice = 0

	for step in range(len(data[trial])):
	# Update first-stage choice and second-stage choice
	if step == 0:
	first_stage_choice = data[trial][step][0]
	second_stage_choice = data[trial][step][1]

	# Update second-stage stimulus values based on model-based algorithm
	reward = data[trial][step][2]
	expected_reward = (transition_probs[first_stage_choice][0] * second_stage_values[0] +
	transition_probs[first_stage_choice][1] * second_stage_values[1])
	prediction_error_model_based = reward - expected_reward

	# Update second-stage values based on prediction error and learning rate
	prediction_error_sarsa = reward - second_stage_values[first_stage_choice][second_stage_choice]
	eligibility_trace[first_stage_choice][second_stage_choice] += 1
	second_stage_values += self.alpha * prediction_error_sarsa * eligibility_trace
	eligibility_trace *= self.lambda_val

	# Update first-stage stimulus values
	stay_bias_term = self.stay_bias * first_stage_values[first_stage_choice]
	w = self.weight
	prediction_error_hybrid = w * prediction_error_model_based + (1 - w) * prediction_error_sarsa
	first_stage_values[first_stage_choice] += self.alpha * prediction_error_hybrid - stay_bias_term

	# Update second-stage values after the last step in the trial
	prediction_error = 0 - second_stage_values[first_stage_choice][second_stage_choice]
	second_stage_values += self.alpha * prediction_error * eligibility_trace

	# Calculate action probabilities using softmax choice rule
	first_stage_action_probs = F.softmax(self.beta * first_stage_values, dim=0)

	return first_stage_action_probs, second_stage_values

	def log_likelihood(self, data) -> torch.Tensor:
	# TODO Calculate the log-likelihood of the data given the model parameters
	# hint: use the hybrid algorithm to calculate the likelihood.
	pass

	def log_prior(self) -> torch.Tensor:
	# TODO Calculate the log-prior of the model parameters (if using Bayesian approach)
	# hint: use the prior distribution of the parameters to calculate the log-prior.
	pass

	def log_posterior(self, data) -> torch.Tensor:
	# Calculate the log-posterior (log-likelihood + log-prior) of the model parameters
	log_likelihood = self.log_likelihood(data)
	log_prior = self.log_prior()
	return log_likelihood + log_prior

	def fit(self, data):
	# TODO fix this garbage

	# Define the objective function (negative log-posterior) and optimizer
	optimizer = optim.Adam(lr=0.01)

	for epoch in range(self.num_epochs):
	# Clear gradients
	optimizer.zero_grad()

	# Compute log-posterior and loss
	log_posterior = self.log_posterior(data)
	loss = -log_posterior

	# Backpropagation
	loss.backward()
	optimizer.step()

	# TODO Print progress or monitor convergence criteria

	# Return the estimated parameter values
	return self.get_parameters()


	def fit_all_users():
	# Example usage
	data = pd.read_csv("Gtaskmb_E2_clean.csv") # Replace with the path to your dataset

	num_first_stage_choices = 2
	num_second_stage_choices = 2
	num_epochs = 100

	# Create an instance of the learning model
	model = LearningModel(num_first_stage_choices, num_second_stage_choices, num_epochs)

	# TODO Run this estimation for each user in your dataset and save the estimated parameters for each user
	estimated_parameters = model.fit(data)


	if __name__ == "__main__":
	fit_all_users()