Skip to content

Instantly share code, notes, and snippets.

@clementpoiret
Last active January 20, 2023 13:35
Show Gist options
  • Save clementpoiret/bf53bc39c671e6d025d30d69a1e1b2a6 to your computer and use it in GitHub Desktop.
Save clementpoiret/bf53bc39c671e6d025d30d69a1e1b2a6 to your computer and use it in GitHub Desktop.
Comparing OpenAI's Codex & ChatGPT, to GitHub's Copilot
import time
import pandas as pd
import numpy as np
import pingouin as pg
from scipy.stats import pearsonr
def mse(y, y_hat):
"""Let's define a dummy error term :)"""
return np.mean((y_hat - y)**2)
def partial_corr_chatgpt(C: np.ndarray) -> np.ndarray:
"""Converts a correlation matrix to a partial correlation matrix.
ChatGPT version"""
p: int = C.shape[0]
P: np.ndarray = np.zeros((p, p))
for i in range(p):
for j in range(i + 1, p):
P[i, j] = -C[i, j] / np.sqrt(C[i, i] * C[j, j])
P[j, i] = P[i, j]
return P
def partial_corr_copilot(C: np.ndarray) -> np.ndarray:
"""Converts a correlation matrix to a partial correlation matrix.
Copilot version"""
p: np.ndarray = np.linalg.inv(np.diag(np.sqrt(1 / C.diagonal())))
return p.dot(C).dot(p)
def partial_corr_codex(C: np.ndarray) -> np.ndarray:
"""Converts a correlation matrix to a partial correlation matrix.
Codex version"""
p: int = C.shape[1]
P_corr: np.ndarray = np.zeros((p, p), dtype=np.float)
for i in range(p):
P_corr[i, i] = 1
for j in range(i + 1, p):
idx: np.ndarray = np.ones(p, dtype=np.bool)
idx[i] = False
idx[j] = False
beta_i: np.ndarray = np.linalg.lstsq(C[:, idx], C[:, j])[0]
beta_j: np.ndarray = np.linalg.lstsq(C[:, idx], C[:, i])[0]
res_j: np.ndarray = C[:, j] - C[:, idx].dot(beta_i)
res_i: np.ndarray = C[:, i] - C[:, idx].dot(beta_j)
corr = pearsonr(res_i, res_j)[0]
P_corr[i, j] = corr
P_corr[j, i] = corr
return P_corr
def partial_corr(corr: np.ndarray) -> np.ndarray:
"""Personal implementation of partial correlation.
Partly based on the implementation of the easystats' `cor_to_pcor` function.
`https://github.com/easystats/correlation/blob/6e0e7b4b3e512109500edcda7c05a8d7ff3e663e/R/cor_to_pcor.R`
Covariance to Correlation matrix taken from nilearn:
`https://github.com/nilearn/nilearn/blob/9c29e3376/nilearn/connectome/connectivity_matrices.py`
"""
inverted: np.ndarray = np.linalg.inv(corr)
diagonal: np.ndarray = np.atleast_2d(1. / np.sqrt(np.diag(inverted)))
_p_corr: np.ndarray = -(inverted * diagonal * diagonal.T)
# Force exact 1. on diagonal
np.fill_diagonal(_p_corr, 1.)
return _p_corr
if __name__ == "__main__":
# Loading a sample dataset and obtain ground truths
df = pg.read_dataset("partial_corr")
corr = df.corr().values
p_corr = df.pcorr().values
# Testing all our functions :)
t0 = int(round(time.time() * 1000))
p_chatgpt = partial_corr_chatgpt(corr)
t1 = int(round(time.time() * 1000))
p_copilot = partial_corr_copilot(corr)
t2 = int(round(time.time() * 1000))
p_codex = partial_corr_codex(corr)
t3 = int(round(time.time() * 1000))
p_mine = partial_corr(corr)
t4 = int(round(time.time() * 1000))
time_chatgpt = t1 - t0
time_copilot = t2 - t1
time_codex = t3 - t2
time_mine = t4 - t3
results = pd.DataFrame({
"Developer": ["ChatGPT", "Copilot", "Codex", "Me :)"],
"Execution time (ms)": [
time_chatgpt, time_copilot, time_codex, time_mine
],
"Is correct": [
np.all(np.isclose(_p, p_corr))
for _p in [p_chatgpt, p_copilot, p_codex, p_mine]
],
"MSE": [
mse(_p, p_corr) for _p in [p_chatgpt, p_copilot, p_codex, p_mine]
],
})
print(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment