Temidayo Omoniyi kiddojazz

## addcolumn.py
# Add a new column named 'Price'
df['Prediction'] = df['labels'].map(label_map)
print(df)

## mapping.py
label_map = {
0: "Business",
1: "Entertainment",
2: "Politics",
3: "Sport",
4: "Tech"
}

## unknowncategorydataframe.py
unlabelled_predictions = []
for data in unlabelled_data:
unlabelled_predictions.append(predict_category(data))
prediction_df = pd.DataFrame({
"data": unlabelled_data,
"labels": unlabelled_predictions,
})
prediction_df.to_csv("model_prediction.csv", index=False)

## unknownurl.py
url_unknown = 'https://github.com/kiddojazz/Multitext-Classification/blob/master/bbc_data.csv?raw=true'
df_unknown = pd.read_csv(url)
print(df_unknown.head(5))

## createconfucsionmatrix.py
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
confusion = confusion_matrix(test_labels, y_pred)
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.2)
sns.heatmap(confusion, annot=True, fmt="d", cmap="Blues", cbar=False, square=True,
xticklabels=["Business", "Entertainment", "Politics", "Sport", "Tech"], yticklabels=["Business", "Entertainment", "Politics", "Sport", "Tech"])
plt.xlabel('Predicted')

## categoryfunction.py
def predict_category(text):
predict_input = loaded_tokenizer.encode(text,
truncation=True,
padding=True,
return_tensors="tf")
output = loaded_model(predict_input)[0]
prediction_value = tf.argmax(output, axis=1).numpy()[0]
return prediction_value
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
y_pred = []

## categorizeprediction.py
predict_input = loaded_tokenizer.encode(test_text,
 truncation=True,
 padding=True,
 return_tensors="tf")

 output = loaded_model(predict_input)[0]

 prediction_value = tf.argmax(output, axis=1).numpy()[0]

 # Convert numeric prediction to category label

## testmodel.py
#Business = 0, Entertainment = 1, Politics = 2, Sport = 3, Tech = 4
predict_input = loaded_tokenizer.encode(test_text,
truncation=True,
padding=True,
return_tensors="tf")

output = loaded_model(predict_input)[0]

prediction_value = tf.argmax(output, axis=1).numpy()[0]
prediction_value

## loadsavemodel.py
save_directory = "Multitext_Classification_2"
loaded_tokenizer = DistilBertTokenizer.from_pretrained(save_directory)
loaded_model = TFDistilBertForSequenceClassification.from_pretrained(save_directory)

## savemodel.py
from tensorflow.keras.models import load_model
save_directory = "Multitext_Classification_colab" # Change this to your preferred location

model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)
	# Add a new column named 'Price'
	df['Prediction'] = df['labels'].map(label_map)
	print(df)
	label_map = {
	0: "Business",
	1: "Entertainment",
	2: "Politics",
	3: "Sport",
	4: "Tech"
	}
	unlabelled_predictions = []
	for data in unlabelled_data:
	unlabelled_predictions.append(predict_category(data))
	prediction_df = pd.DataFrame({
	"data": unlabelled_data,
	"labels": unlabelled_predictions,
	})
	prediction_df.to_csv("model_prediction.csv", index=False)
	url_unknown = 'https://github.com/kiddojazz/Multitext-Classification/blob/master/bbc_data.csv?raw=true'
	df_unknown = pd.read_csv(url)
	print(df_unknown.head(5))
	from sklearn.metrics import confusion_matrix
	from sklearn.metrics import classification_report
	import matplotlib.pyplot as plt
	import seaborn as sns
	confusion = confusion_matrix(test_labels, y_pred)
	plt.figure(figsize=(8, 6))
	sns.set(font_scale=1.2)
	sns.heatmap(confusion, annot=True, fmt="d", cmap="Blues", cbar=False, square=True,
	xticklabels=["Business", "Entertainment", "Politics", "Sport", "Tech"], yticklabels=["Business", "Entertainment", "Politics", "Sport", "Tech"])
	plt.xlabel('Predicted')
	def predict_category(text):
	predict_input = loaded_tokenizer.encode(text,
	truncation=True,
	padding=True,
	return_tensors="tf")
	output = loaded_model(predict_input)[0]
	prediction_value = tf.argmax(output, axis=1).numpy()[0]
	return prediction_value
	# - - - - - - - - - - - - - - - - - - - - - - - - - - -
	y_pred = []
	#Business = 0, Entertainment = 1, Politics = 2, Sport = 3, Tech = 4
	predict_input = loaded_tokenizer.encode(test_text,
	truncation=True,
	padding=True,
	return_tensors="tf")

	output = loaded_model(predict_input)[0]

	prediction_value = tf.argmax(output, axis=1).numpy()[0]
	prediction_value
	save_directory = "Multitext_Classification_2"
	loaded_tokenizer = DistilBertTokenizer.from_pretrained(save_directory)
	loaded_model = TFDistilBertForSequenceClassification.from_pretrained(save_directory)
	from tensorflow.keras.models import load_model
	save_directory = "Multitext_Classification_colab" # Change this to your preferred location

	model.save_pretrained(save_directory)
	tokenizer.save_pretrained(save_directory)