Skip to content

Instantly share code, notes, and snippets.

@danyaljj
Created September 22, 2021 22:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danyaljj/2a77cf74b0a395beaada21c4768170cd to your computer and use it in GitHub Desktop.
Save danyaljj/2a77cf74b0a395beaada21c4768170cd to your computer and use it in GitHub Desktop.
import statistics as st
import scipy.stats
import numpy as np
def metric1(scores, row_aggregator, column_aggregator, cell_aggregator):
row_values = []
for row_idx, row1 in enumerate(scores):
diagonal_x = row1[row_idx]
row_values.append(
column_aggregator(
[cell_aggregator(diagonal_x, x, abs(col_idx - row_idx)) for col_idx, x in enumerate(row1) if col_idx != row_idx]
)
)
score = row_aggregator(row_values)
return(score)
mean_aggregator = st.mean
max_aggregator = max
# def cell_aggregator(diag, y, dist_years):
# return pow(abs(diag - y), dist_years / 5)
def cell_aggregator(diag, y, dist_years):
return pow(max(diag - y, 0), dist_years / 5)
scores0 = [
[0, 1],
[1, 0],
]
scores1 = [
[1, 0],
[0, 1],
]
scores2 = [
[1, 1],
[1, 1]
]
scores3 = [
[1, 0.5],
[0.5, 1],
]
scores4 = [
[0.5, 0.5],
[0.5, 0.5],
]
scores5 = [
[1, 0.5, 0.5],
[0.5, 1, 0.5],
]
scores6 = [
[1, 0.5, 0.5],
[0.5, 1, 0.5],
[0.5, 0.5, 1],
]
scores7 = [
[x/100.0 for x in [91.3, 76.8, 65.5, 56.3, 56.7, 48.4]],
[x/100.0 for x in [81.3, 83.4, 71.6, 62.2, 56.6, 49.1]],
[x/100.0 for x in [68.2, 74.8, 83.9, 72.9, 63.8, 56.2]],
[x/100.0 for x in [60.6, 65.8, 77.1, 79.2, 69.5, 64.3]],
[x/100.0 for x in [51.9, 58.4, 68.6, 72.6, 80.2, 71.8]],
[x/100.0 for x in [45.8, 53.1, 65.1, 69.6, 76.1, 78.0]]
]
scores8 = [
[x/100.0 for x in [91.3, 76.8, 65.5, 56.3, 56.7]],
[x/100.0 for x in [81.3, 83.4, 71.6, 62.2, 56.6]],
[x/100.0 for x in [68.2, 74.8, 83.9, 72.9, 63.8]],
[x/100.0 for x in [60.6, 65.8, 77.1, 79.2, 69.5]],
[x/100.0 for x in [51.9, 58.4, 68.6, 72.6, 80.2]],
]
twitter_politics = "91 77 65 56 57 48 \
81 83 72 62 57 49 \
68 75 84 73 64 56 \
61 66 77 79 69 64 \
52 58 69 73 80 72 \
46 53 65 70 76 78"
twitter_ner = "76 77 76 72 69 69 \
72 74 77 72 69 68 \
72 74 78 71 69 69 \
74 77 79 76 73 71 \
72 76 79 71 74 73 \
71 72 77 71 72 73"
science_scierc = "68 61 60 57 \
64 70 66 67 \
65 69 76 69 \
60 62 65 73"
science_ai = "86 79 71 66 \
83 86 74 63 \
82 85 83 84 \
72 79 78 85 "
news_src = "94 52 59 52 \
60 92 77 75 \
78 81 91 84 \
71 79 82 88 "
news_mfc = "27 25 25 26 \
24 28 24 27 \
22 24 26 26 \
24 26 25 33"
news_sum_rL = "36 39 33 29 \
31 43 35 26 \
29 39 36 27 \
28 32 31 32"
news_sum_r1 = "27 25 25 26 \
24 28 24 27 \
22 24 26 26 \
24 26 25 33"
sep=" "
twitter_politics = np.fromstring(twitter_politics, sep=sep).reshape(6,6)
twitter_ner = np.fromstring(twitter_ner, sep=sep).reshape(6,6)
science_scierc = np.fromstring(science_scierc, sep=sep).reshape(4,4)
science_ai = np.fromstring(science_ai, sep=sep).reshape(4,4)
news_mfc = np.fromstring(news_mfc, sep=sep).reshape(4,4)
news_src = np.fromstring(news_src, sep=sep).reshape(4,4)
news_sum_rL = np.fromstring(news_sum_rL, sep=sep).reshape(4,4)
news_sum_r1 = np.fromstring(news_sum_r1, sep=sep).reshape(4,4)
lst = [twitter_politics, twitter_ner, science_scierc, science_ai, news_mfc, news_src, news_sum_r1, news_sum_rL]
names = ["twitter_politics", "twitter_ner", "science_scierc", "science_ai", "news_mfc", "news_src", "news_sum_r1", "news_sum_rL"]
# manual computation of slope
def slope(x, y):
ymean = np.mean(y)
xmean = np.mean(x)
num = []
den = []
for xx, yy in zip(x, y):
num.append( (yy - ymean) * (xx - xmean) )
den.append( (xx - xmean) * (xx - xmean) )
return np.sum(num) / np.sum(den)
def metric4(scores):
#Iterate over i columns
#For each column calc as like idk
scores = np.array(scores)
a,b = scores.shape
if not a == b:
return np.nan
befores = []
afters = []
for i in range(b):
nb = np.zeros(b)
na = np.zeros(b)
col = scores[:,i]
for j in range(a):
if j <= i:
nb[j] = col[j]
if j >= i: # changed this: basically, the ith element (the highest value) participates in both future and past calculations
na[j] = col[j]
nb = [x for x in nb if abs(x) > 0.001] # changed this: we gotta get rid of the empty cells, otherwise thet skew the slope estimation
na = [x for x in na if abs(x) > 0.001]
if len(nb) > 1:
before_slope = scipy.stats.linregress(list(range(len(nb))), nb).slope
before_slope2 = slope(list(range(len(nb))), nb)
assert abs(abs(before_slope2) - abs(before_slope)) < 0.01, f"the two slope computations don't match: {before_slope} vs {before_slope2}"
befores.append(before_slope)
if len(na) > 1:
after_slope = scipy.stats.linregress(list(range(len(na))), na).slope
after_slope2 = slope(list(range(len(na))), na)
assert abs(abs(after_slope2) - abs(after_slope)) < 0.01, f"the two slope computations don't match: {after_slope} vs {after_slope2}"
afters.append(after_slope)
return ( abs(np.mean(befores)), abs(np.mean(afters)))
for i,j in zip(lst, names):
before_slope, after_slope = metric4(i)
print(f" * name: {j}\n * before_slope: {before_slope}\n * after_slope: {after_slope}\n * avg slope: {(after_slope + before_slope)/2} \n -----")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment