arush15june/data.csv

## data.csv

          
            methods
            heard
            knows
            college

            
              eavesdropping
              40
              3
              1

            
              smshijacking
              29
              2
              1

            
              locationtracking
              43
              5
              1

            
              dosattack
              19
              1
              1

            
              crackingenc
              11
              0
              1

            
              eavesdropping
              41
              6
              2

            
              smshijacking
              34
              3
              2

            
              locationtracking
              40
              2
              2

            
              dosattack
              17
              0
              2

            
              crackingenc
              16
              0
              2

            
              eavesdropping
              43
              8
              3

            
              smshijacking
              31
              2
              3

            
              locationtracking
              35
              6
              3

            
              dosattack
              23
              2
              3

            
              crackingenc
              19
              1
              3

## output.txt
Data of College 1
            methods  heard  knows  college
0     eavesdropping     40      3        1
1      smshijacking     29      2        1
2  locationtracking     43      5        1
3         dosattack     19      1        1
4       crackingenc     11      0        1
Data of College 2
            methods  heard  knows  college
0     eavesdropping     41      6        2
1      smshijacking     34      3        2
2  locationtracking     40      2        2
3         dosattack     17      0        2
4       crackingenc     16      0        2
Data of College 3
            methods  heard  knows  college
0     eavesdropping     43      8        3
1      smshijacking     31      2        3
2  locationtracking     35      6        3
3         dosattack     23      2        3
4       crackingenc     19      1        3

Calculated Parameters for College 1
[0.6735     0.66746552 0.71477907 0.65113158 0.5985    ]
Calculated Parameters for College 2
[0.74484146 0.68673529 0.6485     0.5985     0.5985    ]
Calculated Parameters for College 3
[0.78454651 0.66301613 0.76992857 0.68545652 0.65113158]

Predicted Rating of College 1: 3.97
Predicted Rating of College 2: 4.03
Predicted Rating of College 3: 3.98

## rating.py
import numpy as np
import pandas as pd
from sklearn import linear_model
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

RANDOM_OFFSET = 0.5985

COLLEGE_1_RATING = 3.5
COLLEGE_2_RATING = 4.5

def generateDatasetArray(data_df):
    data_arr = np.zeros(len(data_df))
    for data in data_df.iterrows():
        index = data[0]
        row = data[1]
        data_arr[index] = RANDOM_OFFSET+row['knows']/row['heard']

    return data_arr

if __name__ == '__main__':
    """
        Evaluate colleges for a security rating based on the information in CSV
        Parameters in CSV
            - `methods`: Methods of Intrusion
            - `heard`: Has heard of the method
            - `knows`: Knows the method for sure
            - `college`: College Identification

        Security Awareness Rating
            - Linear Regression to generate a function to evaluate colleges
              on the given parameter and generate a rating.

              Use the ratio of knows and heard for each method as a single feature.

              sum{ method<knows>/method<heard> for method in all methods }

              dataset: Divide the methods on the basis of the college and
                       in the order described in the csv.

        Paramter Wise Tendency Graph
            Plot a graph of the colleges and the heard/knows ratio of the colleges and join them by a line

    """
    data_df = pd.read_csv('data.csv')
    college_1_df, college_2_df, college_3_df = data_df[data_df['college'] == 1].reset_index(drop=True), data_df[data_df['college'] == 2].reset_index(drop=True), data_df[data_df['college'] == 3].reset_index(drop=True)

    print("Data of College 1")
    print(college_1_df)
    print("Data of College 2")
    print(college_2_df)
    print("Data of College 3")
    print(college_3_df)
    print()

    college_1_dataset = generateDatasetArray(college_1_df)
    college_2_dataset = generateDatasetArray(college_2_df)
    college_3_dataset = generateDatasetArray(college_3_df)

    print("Calculated Parameters for College 1")
    print(college_1_dataset)
    print("Calculated Parameters for College 2")
    print(college_2_dataset)
    print("Calculated Parameters for College 3")
    print(college_3_dataset)
    print()

    X = [college_1_dataset, college_2_dataset]
    y = [COLLEGE_1_RATING, COLLEGE_2_RATING]

    reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
    reg.fit(X, y)

    predictions = [round(val, 2) for val in reg.predict([college_1_dataset, college_2_dataset, college_3_dataset])]

    print("Predicted Rating of College 1: {}".format(predictions[0]))
    print("Predicted Rating of College 2: {}".format(predictions[1]))
    print("Predicted Rating of College 3: {}".format(predictions[2]))
    print()


    f, (ax, ax_text) = plt.subplots(1, 2)
    X_RANGE = np.arange(5)

    """ Turn off y labels and set x labels """

    ax.set_xticks(X_RANGE)
    ax.set_xticklabels(['Call Eavesdropping', 'SMS Hijacking', 'Location Tracking', 'DOS Attack', 'Cracking Encryption'])

    """ Plot Points and Line for Knows/Heard Parameters """

    ax.scatter(X_RANGE, college_1_dataset, color='red')
    ax.plot(X_RANGE, college_1_dataset, color='red', label='College 1')

    ax.scatter(X_RANGE, college_2_dataset, color='green')
    ax.plot(X_RANGE, college_2_dataset, color='green', label='College 2')

    ax.scatter(X_RANGE, college_3_dataset, color='blue')
    ax.plot(X_RANGE, college_3_dataset, color='blue', label='College 3')

    ax.legend()

    """ Ratings Text """
    ax_text.axis('off')
    ax_text.grid(False)
    ax_text.text(0.2, 0.8, 'Ratings', fontsize=20)
    ax_text.text(0.2, 0.6, f'College 1: {predictions[0]}', fontsize=20)
    ax_text.text(0.2, 0.4, f'College 2: {predictions[1]}', fontsize=20)
    ax_text.text(0.2, 0.2, f'College 3: {predictions[2]}', fontsize=20)

    plt.show()
methods	heard	knows	college
eavesdropping	40	3	1
smshijacking	29	2	1
locationtracking	43	5	1
dosattack	19	1	1
crackingenc	11	0	1
eavesdropping	41	6	2
smshijacking	34	3	2
locationtracking	40	2	2
dosattack	17	0	2
crackingenc	16	0	2
eavesdropping	43	8	3
smshijacking	31	2	3
locationtracking	35	6	3
dosattack	23	2	3
crackingenc	19	1	3
	Data of College 1
	methods heard knows college
	0 eavesdropping 40 3 1
	1 smshijacking 29 2 1
	2 locationtracking 43 5 1
	3 dosattack 19 1 1
	4 crackingenc 11 0 1
	Data of College 2
	methods heard knows college
	0 eavesdropping 41 6 2
	1 smshijacking 34 3 2
	2 locationtracking 40 2 2
	3 dosattack 17 0 2
	4 crackingenc 16 0 2
	Data of College 3
	methods heard knows college
	0 eavesdropping 43 8 3
	1 smshijacking 31 2 3
	2 locationtracking 35 6 3
	3 dosattack 23 2 3
	4 crackingenc 19 1 3

	Calculated Parameters for College 1
	[0.6735 0.66746552 0.71477907 0.65113158 0.5985 ]
	Calculated Parameters for College 2
	[0.74484146 0.68673529 0.6485 0.5985 0.5985 ]
	Calculated Parameters for College 3
	[0.78454651 0.66301613 0.76992857 0.68545652 0.65113158]

	Predicted Rating of College 1: 3.97
	Predicted Rating of College 2: 4.03
	Predicted Rating of College 3: 3.98
	import numpy as np
	import pandas as pd
	from sklearn import linear_model
	import matplotlib.pyplot as plt
	plt.style.use('seaborn-whitegrid')

	RANDOM_OFFSET = 0.5985

	COLLEGE_1_RATING = 3.5
	COLLEGE_2_RATING = 4.5

	def generateDatasetArray(data_df):
	data_arr = np.zeros(len(data_df))
	for data in data_df.iterrows():
	index = data[0]
	row = data[1]
	data_arr[index] = RANDOM_OFFSET+row['knows']/row['heard']

	return data_arr

	if __name__ == '__main__':
	"""
	Evaluate colleges for a security rating based on the information in CSV
	Parameters in CSV
	- `methods`: Methods of Intrusion
	- `heard`: Has heard of the method
	- `knows`: Knows the method for sure
	- `college`: College Identification

	Security Awareness Rating
	- Linear Regression to generate a function to evaluate colleges
	on the given parameter and generate a rating.

	Use the ratio of knows and heard for each method as a single feature.

	sum{ method<knows>/method<heard> for method in all methods }

	dataset: Divide the methods on the basis of the college and
	in the order described in the csv.

	Paramter Wise Tendency Graph
	Plot a graph of the colleges and the heard/knows ratio of the colleges and join them by a line

	"""
	data_df = pd.read_csv('data.csv')
	college_1_df, college_2_df, college_3_df = data_df[data_df['college'] == 1].reset_index(drop=True), data_df[data_df['college'] == 2].reset_index(drop=True), data_df[data_df['college'] == 3].reset_index(drop=True)

	print("Data of College 1")
	print(college_1_df)
	print("Data of College 2")
	print(college_2_df)
	print("Data of College 3")
	print(college_3_df)
	print()

	college_1_dataset = generateDatasetArray(college_1_df)
	college_2_dataset = generateDatasetArray(college_2_df)
	college_3_dataset = generateDatasetArray(college_3_df)

	print("Calculated Parameters for College 1")
	print(college_1_dataset)
	print("Calculated Parameters for College 2")
	print(college_2_dataset)
	print("Calculated Parameters for College 3")
	print(college_3_dataset)
	print()

	X = [college_1_dataset, college_2_dataset]
	y = [COLLEGE_1_RATING, COLLEGE_2_RATING]

	reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
	reg.fit(X, y)

	predictions = [round(val, 2) for val in reg.predict([college_1_dataset, college_2_dataset, college_3_dataset])]

	print("Predicted Rating of College 1: {}".format(predictions[0]))
	print("Predicted Rating of College 2: {}".format(predictions[1]))
	print("Predicted Rating of College 3: {}".format(predictions[2]))
	print()


	f, (ax, ax_text) = plt.subplots(1, 2)
	X_RANGE = np.arange(5)

	""" Turn off y labels and set x labels """

	ax.set_xticks(X_RANGE)
	ax.set_xticklabels(['Call Eavesdropping', 'SMS Hijacking', 'Location Tracking', 'DOS Attack', 'Cracking Encryption'])

	""" Plot Points and Line for Knows/Heard Parameters """

	ax.scatter(X_RANGE, college_1_dataset, color='red')
	ax.plot(X_RANGE, college_1_dataset, color='red', label='College 1')

	ax.scatter(X_RANGE, college_2_dataset, color='green')
	ax.plot(X_RANGE, college_2_dataset, color='green', label='College 2')

	ax.scatter(X_RANGE, college_3_dataset, color='blue')
	ax.plot(X_RANGE, college_3_dataset, color='blue', label='College 3')

	ax.legend()

	""" Ratings Text """
	ax_text.axis('off')
	ax_text.grid(False)
	ax_text.text(0.2, 0.8, 'Ratings', fontsize=20)
	ax_text.text(0.2, 0.6, f'College 1: {predictions[0]}', fontsize=20)
	ax_text.text(0.2, 0.4, f'College 2: {predictions[1]}', fontsize=20)
	ax_text.text(0.2, 0.2, f'College 3: {predictions[2]}', fontsize=20)

	plt.show()