Last active
January 20, 2023 13:35
-
-
Save clementpoiret/bf53bc39c671e6d025d30d69a1e1b2a6 to your computer and use it in GitHub Desktop.
Comparing OpenAI's Codex & ChatGPT, to GitHub's Copilot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import pandas as pd | |
import numpy as np | |
import pingouin as pg | |
from scipy.stats import pearsonr | |
def mse(y, y_hat): | |
"""Let's define a dummy error term :)""" | |
return np.mean((y_hat - y)**2) | |
def partial_corr_chatgpt(C: np.ndarray) -> np.ndarray: | |
"""Converts a correlation matrix to a partial correlation matrix. | |
ChatGPT version""" | |
p: int = C.shape[0] | |
P: np.ndarray = np.zeros((p, p)) | |
for i in range(p): | |
for j in range(i + 1, p): | |
P[i, j] = -C[i, j] / np.sqrt(C[i, i] * C[j, j]) | |
P[j, i] = P[i, j] | |
return P | |
def partial_corr_copilot(C: np.ndarray) -> np.ndarray: | |
"""Converts a correlation matrix to a partial correlation matrix. | |
Copilot version""" | |
p: np.ndarray = np.linalg.inv(np.diag(np.sqrt(1 / C.diagonal()))) | |
return p.dot(C).dot(p) | |
def partial_corr_codex(C: np.ndarray) -> np.ndarray: | |
"""Converts a correlation matrix to a partial correlation matrix. | |
Codex version""" | |
p: int = C.shape[1] | |
P_corr: np.ndarray = np.zeros((p, p), dtype=np.float) | |
for i in range(p): | |
P_corr[i, i] = 1 | |
for j in range(i + 1, p): | |
idx: np.ndarray = np.ones(p, dtype=np.bool) | |
idx[i] = False | |
idx[j] = False | |
beta_i: np.ndarray = np.linalg.lstsq(C[:, idx], C[:, j])[0] | |
beta_j: np.ndarray = np.linalg.lstsq(C[:, idx], C[:, i])[0] | |
res_j: np.ndarray = C[:, j] - C[:, idx].dot(beta_i) | |
res_i: np.ndarray = C[:, i] - C[:, idx].dot(beta_j) | |
corr = pearsonr(res_i, res_j)[0] | |
P_corr[i, j] = corr | |
P_corr[j, i] = corr | |
return P_corr | |
def partial_corr(corr: np.ndarray) -> np.ndarray: | |
"""Personal implementation of partial correlation. | |
Partly based on the implementation of the easystats' `cor_to_pcor` function. | |
`https://github.com/easystats/correlation/blob/6e0e7b4b3e512109500edcda7c05a8d7ff3e663e/R/cor_to_pcor.R` | |
Covariance to Correlation matrix taken from nilearn: | |
`https://github.com/nilearn/nilearn/blob/9c29e3376/nilearn/connectome/connectivity_matrices.py` | |
""" | |
inverted: np.ndarray = np.linalg.inv(corr) | |
diagonal: np.ndarray = np.atleast_2d(1. / np.sqrt(np.diag(inverted))) | |
_p_corr: np.ndarray = -(inverted * diagonal * diagonal.T) | |
# Force exact 1. on diagonal | |
np.fill_diagonal(_p_corr, 1.) | |
return _p_corr | |
if __name__ == "__main__": | |
# Loading a sample dataset and obtain ground truths | |
df = pg.read_dataset("partial_corr") | |
corr = df.corr().values | |
p_corr = df.pcorr().values | |
# Testing all our functions :) | |
t0 = int(round(time.time() * 1000)) | |
p_chatgpt = partial_corr_chatgpt(corr) | |
t1 = int(round(time.time() * 1000)) | |
p_copilot = partial_corr_copilot(corr) | |
t2 = int(round(time.time() * 1000)) | |
p_codex = partial_corr_codex(corr) | |
t3 = int(round(time.time() * 1000)) | |
p_mine = partial_corr(corr) | |
t4 = int(round(time.time() * 1000)) | |
time_chatgpt = t1 - t0 | |
time_copilot = t2 - t1 | |
time_codex = t3 - t2 | |
time_mine = t4 - t3 | |
results = pd.DataFrame({ | |
"Developer": ["ChatGPT", "Copilot", "Codex", "Me :)"], | |
"Execution time (ms)": [ | |
time_chatgpt, time_copilot, time_codex, time_mine | |
], | |
"Is correct": [ | |
np.all(np.isclose(_p, p_corr)) | |
for _p in [p_chatgpt, p_copilot, p_codex, p_mine] | |
], | |
"MSE": [ | |
mse(_p, p_corr) for _p in [p_chatgpt, p_copilot, p_codex, p_mine] | |
], | |
}) | |
print(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment