Skip to content

Instantly share code, notes, and snippets.

@yrik
Created May 17, 2023 19:15
Show Gist options
  • Save yrik/fa35868ed43a5ee33dbe56c18a0b08e4 to your computer and use it in GitHub Desktop.
Save yrik/fa35868ed43a5ee33dbe56c18a0b08e4 to your computer and use it in GitHub Desktop.
Visualise possible IELTS scores discrepancy by given correlation coefficient
import numpy as np
from scipy.stats import spearmanr
import time
def generate_correlated_data(target_corr, ref_scores, score_step=0.5, low=3, high=9, max_iterations=1000000):
# Set initial y values to be equal to reference scores
y = np.array(ref_scores)
# Generate initial random x values
x = np.random.choice(np.arange(low, high, score_step), size=len(ref_scores))
# Calculate initial Spearman correlation
corr, _ = spearmanr(x, y)
# Keep track of the best x values and their correlation
best_x = x
best_corr = corr
# Adjust x values until the Spearman correlation is close enough to the target
start_time = time.time()
for _ in range(max_iterations):
# Check time
if time.time() - start_time > 5: # 5 seconds
break
# Generate new random x values
x = np.random.choice(np.arange(low, high, score_step), size=len(ref_scores))
# Calculate Spearman correlation
corr, _ = spearmanr(x, y)
# If this correlation is closer to the target, update the best x values
if abs(corr - target_corr) < abs(best_corr - target_corr):
best_x = x
best_corr = corr
if abs(corr - target_corr) == 0:
break
return best_x, y, best_corr
# Define reference scores
ref_scores = [3, 3.5, 4, 4.5, 5, 5.5, 6, 6, 7, 7, 7, 8, 8.5]
# Generate data with a target correlation of 0.8
x, y, corr = generate_correlated_data(0.5, ref_scores, 0.5, 3, 9)
print('Generated data correlation:', corr)
print('Reference Scores: ', y)
print('Sample Scores: ', x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment