Skip to content

Instantly share code, notes, and snippets.

@HundSimon
Last active June 11, 2024 07:18
Show Gist options
  • Save HundSimon/125337805e4a2862f58f49cbd2c0b1f8 to your computer and use it in GitHub Desktop.
Save HundSimon/125337805e4a2862f58f49cbd2c0b1f8 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from scipy.stats import entropy
def calculate_entropy(data, n):
return 1/np.log(n)*entropy(data)
def calculate_weights(entropy_list):
weight_list = [1 - entropy_i for entropy_i in entropy_list]
return weight_list / sum(weight_list)
def calculate_scores(data, weight_list):
scores = []
for i in range(len(data)):
score_i = sum(weight_list[j] * data[i,j] for j in range(len(weight_list)))
scores.append(score_i)
return scores
# Load data
df = pd.read_excel('data.xlsx')
df.set_index('Name', inplace=True)
indicators = df.columns.tolist()
roller_coasters = df.index.tolist()
data = df.values
n = data.shape[0]
# Calculate Entropy
entropy_list = [calculate_entropy(data[:,i], n) for i in range(len(indicators))]
# Print Entropy weights
for i, entropy_i in enumerate(entropy_list):
print(f"Entropy weight for {indicators[i]}: {entropy_i}")
# Calculate and print overall scores
weight_list = calculate_weights(entropy_list)
scores = calculate_scores(data, weight_list)
for i, score_i in enumerate(scores):
print(f"Scores for {roller_coasters[i]}: {score_i}")
# Get and print top ten roller coasters
best_roller_coasters_indices = np.argsort(scores)[-10:][::-1]
best_roller_coasters = [roller_coasters[i] for i in best_roller_coasters_indices]
print("Top 10::", best_roller_coasters)
import numpy as np
from sklearn.linear_model import LinearRegression
import pandas as pd
file_path = 'data.xlsx'
df = pd.read_excel(file_path)
# Y
y_column_data = df['Drop (feet)']
y_data_list = y_column_data.tolist()
# X
column_names = ['Height (feet)', 'Speed (mph)', 'Length (feet)']
x_data_list = []
for column_name in column_names:
x_data_list.append(df[column_name].tolist())
# Model
x_data_list, y_data_list = np.array(x_data_list), np.array(y_data_list)
model = LinearRegression().fit(x_data_list, y_data_list)
r_sq = model.score(x, y)
import pandas as pd
from sklearn.linear_model import LinearRegression
# Data
df = pd.read_excel('data.xlsx')
df_clean = df.dropna(subset=['Drop (feet)'])
X = df_clean[['Height (feet)', 'Speed (mph)', 'Length (feet)']]
y = df_clean['Drop (feet)']
# Model
regression = LinearRegression()
regression.fit(X.values, y)
# Test
height_input = 98.4
speed_input = 45
length_input = 2788.8
predicted_drop = regression.predict([[height_input, speed_input, length_input]])[0]
print(predicted_drop)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment