danyaljj/slope-computation.py

## slope-computation.py
import statistics as st

import scipy.stats
import numpy as np

def metric1(scores, row_aggregator, column_aggregator, cell_aggregator):
    row_values = []
    for row_idx, row1 in enumerate(scores):
        diagonal_x = row1[row_idx]
        row_values.append(
            column_aggregator(
                [cell_aggregator(diagonal_x, x, abs(col_idx - row_idx)) for col_idx, x in enumerate(row1) if col_idx !=  row_idx]
            )
        )
    score = row_aggregator(row_values)
    return(score)


mean_aggregator = st.mean
max_aggregator = max
# def cell_aggregator(diag, y, dist_years):
#     return pow(abs(diag - y), dist_years / 5)

def cell_aggregator(diag, y, dist_years):
    return pow(max(diag - y, 0), dist_years / 5)


scores0 = [
    [0, 1],
    [1, 0],
]

scores1 = [
    [1, 0],
    [0, 1],
]

scores2 = [
    [1, 1],
    [1, 1]
]

scores3 = [
    [1, 0.5],
    [0.5, 1],
]

scores4 = [
    [0.5, 0.5],
    [0.5, 0.5],
]

scores5 = [
    [1, 0.5, 0.5],
    [0.5, 1, 0.5],
]

scores6 = [
    [1, 0.5, 0.5],
    [0.5, 1, 0.5],
    [0.5, 0.5, 1],
]

scores7 = [
    [x/100.0 for x in [91.3, 76.8, 65.5, 56.3, 56.7, 48.4]],
    [x/100.0 for x in [81.3, 83.4, 71.6, 62.2, 56.6, 49.1]],
    [x/100.0 for x in [68.2, 74.8, 83.9, 72.9, 63.8, 56.2]],
    [x/100.0 for x in [60.6, 65.8, 77.1, 79.2, 69.5, 64.3]],
    [x/100.0 for x in [51.9, 58.4, 68.6, 72.6, 80.2, 71.8]],
    [x/100.0 for x in [45.8, 53.1, 65.1, 69.6, 76.1, 78.0]]
]

scores8 = [
    [x/100.0 for x in [91.3, 76.8, 65.5, 56.3, 56.7]],
    [x/100.0 for x in [81.3, 83.4, 71.6, 62.2, 56.6]],
    [x/100.0 for x in [68.2, 74.8, 83.9, 72.9, 63.8]],
    [x/100.0 for x in [60.6, 65.8, 77.1, 79.2, 69.5]],
    [x/100.0 for x in [51.9, 58.4, 68.6, 72.6, 80.2]],
]

twitter_politics = "91    77    65    56    57    48 \
81    83    72    62    57    49 \
68    75    84    73    64    56 \
61    66    77    79    69    64 \
52    58    69    73    80    72 \
46    53    65    70    76    78"


twitter_ner = "76   77  76  72  69  69    \
72  74  77  72  69  68    \
72  74  78  71  69  69    \
74  77  79  76  73  71    \
72  76  79  71  74  73    \
71  72  77  71  72  73"

science_scierc = "68    61    60    57    \
64    70    66    67    \
65    69    76    69    \
60    62    65    73"


science_ai = "86    79  71  66    \
83  86  74  63     \
82  85  83  84     \
72  79  78  85    "

news_src = "94  52  59  52    \
60  92  77  75    \
78  81  91  84    \
71  79  82  88    "

news_mfc = "27  25  25  26    \
24  28  24  27    \
22  24  26  26    \
24  26  25  33"

news_sum_rL = "36   39  33  29    \
31  43  35  26    \
29  39  36  27    \
28  32  31  32"

news_sum_r1 = "27   25  25  26    \
24  28  24  27    \
22  24  26  26    \
24  26  25  33"


sep="   "
twitter_politics = np.fromstring(twitter_politics, sep=sep).reshape(6,6)
twitter_ner = np.fromstring(twitter_ner, sep=sep).reshape(6,6)


science_scierc = np.fromstring(science_scierc, sep=sep).reshape(4,4)
science_ai = np.fromstring(science_ai, sep=sep).reshape(4,4)

news_mfc = np.fromstring(news_mfc, sep=sep).reshape(4,4)
news_src = np.fromstring(news_src, sep=sep).reshape(4,4)
news_sum_rL = np.fromstring(news_sum_rL, sep=sep).reshape(4,4)
news_sum_r1 = np.fromstring(news_sum_r1, sep=sep).reshape(4,4)

lst = [twitter_politics, twitter_ner, science_scierc, science_ai, news_mfc, news_src, news_sum_r1, news_sum_rL]
names = ["twitter_politics", "twitter_ner", "science_scierc", "science_ai", "news_mfc", "news_src", "news_sum_r1", "news_sum_rL"]


# manual computation of slope
def slope(x, y):
    ymean = np.mean(y)
    xmean = np.mean(x)
    num = []
    den = []
    for xx, yy in zip(x, y):
        num.append( (yy - ymean) * (xx - xmean) )
        den.append( (xx - xmean) * (xx - xmean) )

    return np.sum(num) / np.sum(den)

def metric4(scores):
    #Iterate over i columns
    #For each column calc as like idk
    scores = np.array(scores)

    a,b = scores.shape
    if not a == b:
        return np.nan
    befores = []
    afters = []
    for i in range(b):
        nb = np.zeros(b)
        na = np.zeros(b)
        col = scores[:,i]

        for j in range(a):
            if j <= i:
                nb[j] = col[j]
            if j >= i: # changed this: basically, the ith element (the highest value) participates in both future and past calculations
                na[j] = col[j]

        nb = [x for x in nb if abs(x) > 0.001] # changed this: we gotta get rid of the empty cells, otherwise thet skew the slope estimation
        na = [x for x in na if abs(x) > 0.001]
        if len(nb) > 1:
            before_slope = scipy.stats.linregress(list(range(len(nb))), nb).slope
            before_slope2 = slope(list(range(len(nb))), nb)
            assert abs(abs(before_slope2) - abs(before_slope)) < 0.01, f"the two slope computations don't match: {before_slope} vs {before_slope2}"
            befores.append(before_slope)

        if len(na) > 1:
            after_slope = scipy.stats.linregress(list(range(len(na))), na).slope
            after_slope2 = slope(list(range(len(na))), na)
            assert abs(abs(after_slope2) - abs(after_slope)) < 0.01, f"the two slope computations don't match: {after_slope} vs {after_slope2}"
            afters.append(after_slope)

    return ( abs(np.mean(befores)), abs(np.mean(afters)))


for i,j in zip(lst, names):
    before_slope, after_slope = metric4(i)
    print(f" * name: {j}\n * before_slope: {before_slope}\n * after_slope: {after_slope}\n * avg slope: {(after_slope + before_slope)/2} \n -----")
	import statistics as st

	import scipy.stats
	import numpy as np

	def metric1(scores, row_aggregator, column_aggregator, cell_aggregator):
	row_values = []
	for row_idx, row1 in enumerate(scores):
	diagonal_x = row1[row_idx]
	row_values.append(
	column_aggregator(
	[cell_aggregator(diagonal_x, x, abs(col_idx - row_idx)) for col_idx, x in enumerate(row1) if col_idx != row_idx]
	)
	)
	score = row_aggregator(row_values)
	return(score)





	mean_aggregator = st.mean
	max_aggregator = max
	# def cell_aggregator(diag, y, dist_years):
	# return pow(abs(diag - y), dist_years / 5)

	def cell_aggregator(diag, y, dist_years):
	return pow(max(diag - y, 0), dist_years / 5)


	scores0 = [
	[0, 1],
	[1, 0],
	]

	scores1 = [
	[1, 0],
	[0, 1],
	]

	scores2 = [
	[1, 1],
	[1, 1]
	]

	scores3 = [
	[1, 0.5],
	[0.5, 1],
	]

	scores4 = [
	[0.5, 0.5],
	[0.5, 0.5],
	]

	scores5 = [
	[1, 0.5, 0.5],
	[0.5, 1, 0.5],
	]

	scores6 = [
	[1, 0.5, 0.5],
	[0.5, 1, 0.5],
	[0.5, 0.5, 1],
	]

	scores7 = [
	[x/100.0 for x in [91.3, 76.8, 65.5, 56.3, 56.7, 48.4]],
	[x/100.0 for x in [81.3, 83.4, 71.6, 62.2, 56.6, 49.1]],
	[x/100.0 for x in [68.2, 74.8, 83.9, 72.9, 63.8, 56.2]],
	[x/100.0 for x in [60.6, 65.8, 77.1, 79.2, 69.5, 64.3]],
	[x/100.0 for x in [51.9, 58.4, 68.6, 72.6, 80.2, 71.8]],
	[x/100.0 for x in [45.8, 53.1, 65.1, 69.6, 76.1, 78.0]]
	]

	scores8 = [
	[x/100.0 for x in [91.3, 76.8, 65.5, 56.3, 56.7]],
	[x/100.0 for x in [81.3, 83.4, 71.6, 62.2, 56.6]],
	[x/100.0 for x in [68.2, 74.8, 83.9, 72.9, 63.8]],
	[x/100.0 for x in [60.6, 65.8, 77.1, 79.2, 69.5]],
	[x/100.0 for x in [51.9, 58.4, 68.6, 72.6, 80.2]],
	]

	twitter_politics = "91 77 65 56 57 48 \
	81 83 72 62 57 49 \
	68 75 84 73 64 56 \
	61 66 77 79 69 64 \
	52 58 69 73 80 72 \
	46 53 65 70 76 78"


	twitter_ner = "76 77 76 72 69 69 \
	72 74 77 72 69 68 \
	72 74 78 71 69 69 \
	74 77 79 76 73 71 \
	72 76 79 71 74 73 \
	71 72 77 71 72 73"

	science_scierc = "68 61 60 57 \
	64 70 66 67 \
	65 69 76 69 \
	60 62 65 73"


	science_ai = "86 79 71 66 \
	83 86 74 63 \
	82 85 83 84 \
	72 79 78 85 "

	news_src = "94 52 59 52 \
	60 92 77 75 \
	78 81 91 84 \
	71 79 82 88 "

	news_mfc = "27 25 25 26 \
	24 28 24 27 \
	22 24 26 26 \
	24 26 25 33"

	news_sum_rL = "36 39 33 29 \
	31 43 35 26 \
	29 39 36 27 \
	28 32 31 32"

	news_sum_r1 = "27 25 25 26 \
	24 28 24 27 \
	22 24 26 26 \
	24 26 25 33"



	sep=" "
	twitter_politics = np.fromstring(twitter_politics, sep=sep).reshape(6,6)
	twitter_ner = np.fromstring(twitter_ner, sep=sep).reshape(6,6)


	science_scierc = np.fromstring(science_scierc, sep=sep).reshape(4,4)
	science_ai = np.fromstring(science_ai, sep=sep).reshape(4,4)

	news_mfc = np.fromstring(news_mfc, sep=sep).reshape(4,4)
	news_src = np.fromstring(news_src, sep=sep).reshape(4,4)
	news_sum_rL = np.fromstring(news_sum_rL, sep=sep).reshape(4,4)
	news_sum_r1 = np.fromstring(news_sum_r1, sep=sep).reshape(4,4)

	lst = [twitter_politics, twitter_ner, science_scierc, science_ai, news_mfc, news_src, news_sum_r1, news_sum_rL]
	names = ["twitter_politics", "twitter_ner", "science_scierc", "science_ai", "news_mfc", "news_src", "news_sum_r1", "news_sum_rL"]


	# manual computation of slope
	def slope(x, y):
	ymean = np.mean(y)
	xmean = np.mean(x)
	num = []
	den = []
	for xx, yy in zip(x, y):
	num.append( (yy - ymean) * (xx - xmean) )
	den.append( (xx - xmean) * (xx - xmean) )

	return np.sum(num) / np.sum(den)

	def metric4(scores):
	#Iterate over i columns
	#For each column calc as like idk
	scores = np.array(scores)

	a,b = scores.shape
	if not a == b:
	return np.nan
	befores = []
	afters = []
	for i in range(b):
	nb = np.zeros(b)
	na = np.zeros(b)
	col = scores[:,i]

	for j in range(a):
	if j <= i:
	nb[j] = col[j]
	if j >= i: # changed this: basically, the ith element (the highest value) participates in both future and past calculations
	na[j] = col[j]

	nb = [x for x in nb if abs(x) > 0.001] # changed this: we gotta get rid of the empty cells, otherwise thet skew the slope estimation
	na = [x for x in na if abs(x) > 0.001]
	if len(nb) > 1:
	before_slope = scipy.stats.linregress(list(range(len(nb))), nb).slope
	before_slope2 = slope(list(range(len(nb))), nb)
	assert abs(abs(before_slope2) - abs(before_slope)) < 0.01, f"the two slope computations don't match: {before_slope} vs {before_slope2}"
	befores.append(before_slope)

	if len(na) > 1:
	after_slope = scipy.stats.linregress(list(range(len(na))), na).slope
	after_slope2 = slope(list(range(len(na))), na)
	assert abs(abs(after_slope2) - abs(after_slope)) < 0.01, f"the two slope computations don't match: {after_slope} vs {after_slope2}"
	afters.append(after_slope)

	return ( abs(np.mean(befores)), abs(np.mean(afters)))


	for i,j in zip(lst, names):
	before_slope, after_slope = metric4(i)
	print(f" * name: {j}\n * before_slope: {before_slope}\n * after_slope: {after_slope}\n * avg slope: {(after_slope + before_slope)/2} \n -----")