Created
August 7, 2023 08:51
-
-
Save defrindr/53602b29115837f298215f8ffcb0c7a3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
import mysql.connector | |
# Konfigurasi koneksi ke database MySQL | |
config = { | |
'user': 'root', | |
'password': '', | |
'host': 'localhost', | |
'database': 'db_sentimen', | |
'port': '3306' # ganti dengan port MySQL Anda (biasanya 3306) | |
} | |
# Membuka koneksi ke database | |
conn = mysql.connector.connect(**config) | |
# Membuat kursor untuk berinteraksi dengan database | |
cursor = conn.cursor() | |
# Menjalankan kueri untuk mengambil data | |
query = 'SELECT content, training FROM eval_opinion where id <= 100' | |
cursor.execute(query) | |
# Mengambil semua data yang dihasilkan dari kueri | |
data = cursor.fetchall() | |
counts = len(data) | |
sentences = [item[0] for item in data] | |
labels = [item[1] for item in data] | |
# # Data training (contoh data) | |
# sentences = [ | |
# "Produk ini sangat bagus!", | |
# "Saya puas dengan layanan pelanggan mereka.", | |
# "Harga produk ini terlalu mahal.", | |
# "Pengalaman buruk dengan produk ini.", | |
# "Saya merasa biasa saja tentang produk ini.", | |
# ] | |
# labels = ['positif', 'positif', 'negatif', 'negatif', 'netral'] | |
# Preprocessing data teks menggunakan CountVectorizer | |
vectorizer = CountVectorizer() | |
X_train_counts = vectorizer.fit_transform(sentences) | |
# Latih model Naive Bayes | |
classifier = MultinomialNB() | |
classifier.fit(X_train_counts, labels) | |
query = 'SELECT content, training FROM eval_opinion where id > 100' | |
cursor.execute(query) | |
data = cursor.fetchall() | |
# Inputan untuk diprediksi sentimennya | |
count_tests = len(data) | |
input_text = [item[0] for item in data] | |
training_result = [item[1] for item in data] | |
# Preprocessing data teks inputan | |
X_input_counts = vectorizer.transform(input_text) | |
# Lakukan prediksi pada data inputan | |
predicted_sentiments = classifier.predict(X_input_counts) | |
results = [] | |
# Hasil prediksi | |
i = 0 | |
valid = 0 | |
for text, sentiment in zip(input_text, predicted_sentiments): | |
if training_result[i] == sentiment: | |
valid += 1 | |
i += 1 | |
print(f'Data Training = {counts}') | |
print(f'Data Uji Coba = {count_tests}') | |
print(f'Data Valid = {valid}') | |
print(f'Akurasi = {valid / count_tests}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment