Lakshmi-1212

## LP_prob.py
print(prob)

## RandomForestClassifier.py
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import r2_score
# Create a random forest classifier and fit the training data
rfmodel = RandomForestClassifier().fit(X_train, y_train)
# Predict the values on the test dataset using sklearn
y_pred_rf = rfmodel.predict(X_test)
r2_score(y_test, y_pred_rf)

## LimeExplainer.py
from lime.lime_tabular import LimeTabularExplainer
# Create the Lime Explainer for tabular data
explainer = LimeTabularExplainer(X_train.values,
    mode='classification',feature_names=X_train.columns,class_names=model.classes_ )

## Explain_Observation1.py
# Explanation from LIME model for observation 1
observation = 1
# Set the predict function for the model
predict_fn = lambda x: rfmodel.predict_proba(x).astype(float)
# Explain the prediction
lime_explanation = rfexplainer.explain_instance(X_test.values[observation], rfpredict_fn, num_features=10)
# Show the explanation in the notebook
lime_explanation.show_in_notebook(show_table=True)

## batsman.py
# Group details on batsmen
batgroup = balldf.groupby(['batsman'])

# Create a batting dataframe with a summary statistics for each batsman
batdf = pd.DataFrame(batgroup['ball'].count()).rename(columns={'ball':'balls_faced'})
batdf['innings'] = batgroup['id'].nunique()
batdf['runs'] = batgroup['batsman_runs'].sum()
batdf['4s'] = balldf[balldf['batsman_runs'] == 4].groupby('batsman')['batsman_runs'].count()
batdf['4s'].fillna(0,inplace=True)
batdf['6s'] = balldf[balldf['batsman_runs'] == 6].groupby('batsman')['batsman_runs'].count()

## bowlers.py
# Group details on bowler
bowlgroup = balldf.groupby(['bowler'])

# Create a bowling dataframe (bowldf) with a summary statistics for each batsman
bowldf = pd.DataFrame(bowlgroup['ball'].count()).rename(columns={'ball':'balls_bowled'})

# Get no. of wickets taken by each bowler
bwl_wkts = balldf[balldf['dismissal_kind'].isin(['caught','bowled', 'lbw','stumped', 'caught and bowled', 'hit wicket'])]
bowldf['wickets'] = bwl_wkts.groupby(['bowler'])['ball'].count()
bowldf['wickets'].fillna(0,inplace=True)

## all_players.py
# Create a dataframe with all players list
all_players_dict = {}
out_temp = balldf.apply(lambda x: update_player_list(x),axis=1)
all_df = pd.DataFrame({'Players':list(all_players_dict.keys())})
all_df['matches'] = all_df['Players'].apply(lambda x: len(all_players_dict[x]))
all_df=all_df.set_index('Players')

## merge_data.py
# Combine the batting and bowling dataframes to create a merged players dataframe
players = pd.merge(all_df,batdf, left_index=True, right_index=True,how='outer')
players = pd.merge(players,bowldf, left_index=True, right_index=True,how='outer')

# Number of man of the matches from matches file
players = pd.merge(players,matches['player_of_match'].value_counts(), left_index=True, right_index=True,how='left')
players['player_of_match']  = players[['player_of_match']].fillna(0)

## kmeans.py
from sklearn.cluster import KMeans

# Create the KMeans object for 5 clusters and 50 iterations
kmeans = KMeans(n_clusters=5, max_iter=50)

# Fit the k-means algorithm on the given scaled players data
kmeans.fit(players_scaled)

# Assign the label to the output column in the original players
players[cluster_kmeans] = kmeans.labels_

## kmeans_clustering.py
from sklearn.cluster import KMeans

# Create the KMeans object for 5 clusters and 50 iterations
kmeans = KMeans(n_clusters=4, max_iter=50)

# Fit the k-means algorithm on the given scaled players data
kmeans.fit(players_scaled)

# Assign the label to the output column in the original players
players["cluster_kmeans"] = kmeans.labels_
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import r2_score
	# Create a random forest classifier and fit the training data
	rfmodel = RandomForestClassifier().fit(X_train, y_train)
	# Predict the values on the test dataset using sklearn
	y_pred_rf = rfmodel.predict(X_test)
	r2_score(y_test, y_pred_rf)
	from lime.lime_tabular import LimeTabularExplainer
	# Create the Lime Explainer for tabular data
	explainer = LimeTabularExplainer(X_train.values,
	mode='classification',feature_names=X_train.columns,class_names=model.classes_ )
	# Explanation from LIME model for observation 1
	observation = 1
	# Set the predict function for the model
	predict_fn = lambda x: rfmodel.predict_proba(x).astype(float)
	# Explain the prediction
	lime_explanation = rfexplainer.explain_instance(X_test.values[observation], rfpredict_fn, num_features=10)
	# Show the explanation in the notebook
	lime_explanation.show_in_notebook(show_table=True)
	# Group details on batsmen
	batgroup = balldf.groupby(['batsman'])

	# Create a batting dataframe with a summary statistics for each batsman
	batdf = pd.DataFrame(batgroup['ball'].count()).rename(columns={'ball':'balls_faced'})
	batdf['innings'] = batgroup['id'].nunique()
	batdf['runs'] = batgroup['batsman_runs'].sum()
	batdf['4s'] = balldf[balldf['batsman_runs'] == 4].groupby('batsman')['batsman_runs'].count()
	batdf['4s'].fillna(0,inplace=True)
	batdf['6s'] = balldf[balldf['batsman_runs'] == 6].groupby('batsman')['batsman_runs'].count()
	# Group details on bowler
	bowlgroup = balldf.groupby(['bowler'])

	# Create a bowling dataframe (bowldf) with a summary statistics for each batsman
	bowldf = pd.DataFrame(bowlgroup['ball'].count()).rename(columns={'ball':'balls_bowled'})

	# Get no. of wickets taken by each bowler
	bwl_wkts = balldf[balldf['dismissal_kind'].isin(['caught','bowled', 'lbw','stumped', 'caught and bowled', 'hit wicket'])]
	bowldf['wickets'] = bwl_wkts.groupby(['bowler'])['ball'].count()
	bowldf['wickets'].fillna(0,inplace=True)
	# Create a dataframe with all players list
	all_players_dict = {}
	out_temp = balldf.apply(lambda x: update_player_list(x),axis=1)
	all_df = pd.DataFrame({'Players':list(all_players_dict.keys())})
	all_df['matches'] = all_df['Players'].apply(lambda x: len(all_players_dict[x]))
	all_df=all_df.set_index('Players')
	# Combine the batting and bowling dataframes to create a merged players dataframe
	players = pd.merge(all_df,batdf, left_index=True, right_index=True,how='outer')
	players = pd.merge(players,bowldf, left_index=True, right_index=True,how='outer')

	# Number of man of the matches from matches file
	players = pd.merge(players,matches['player_of_match'].value_counts(), left_index=True, right_index=True,how='left')
	players['player_of_match'] = players[['player_of_match']].fillna(0)
	from sklearn.cluster import KMeans

	# Create the KMeans object for 5 clusters and 50 iterations
	kmeans = KMeans(n_clusters=5, max_iter=50)

	# Fit the k-means algorithm on the given scaled players data
	kmeans.fit(players_scaled)

	# Assign the label to the output column in the original players
	players[cluster_kmeans] = kmeans.labels_
	from sklearn.cluster import KMeans

	# Create the KMeans object for 5 clusters and 50 iterations
	kmeans = KMeans(n_clusters=4, max_iter=50)

	# Fit the k-means algorithm on the given scaled players data
	kmeans.fit(players_scaled)

	# Assign the label to the output column in the original players
	players["cluster_kmeans"] = kmeans.labels_