RayendraSabandar/README.md

## README.md

      
    Raw
  

              README.md
            
          
    How to run the code


Create local env

python -m venv {{env_name}}


Activate the env

source {{env_name}}/bin/activate 


Install required packages

pip install -r requirements.txt


Train model

python train.py


Run as API

python app.py


Run in terminal

python chat.py


## app.py
from flask import Flask, request, jsonify
from chat import get_response
from flask_cors import CORS

app=Flask(__name__)
CORS(app)

@app.get("/ping")
def ping():
    response={"message": "pong"}
    return jsonify(response)

@app.post("/predict")
def predict():
    text=request.get_json().get("message")
    answer=get_response(text)
    response={"answer":answer}
    return jsonify(response)

if __name__=="__main__":
    app.run(port=8000, debug=True)


## chat.py
import random
import json
import torch
from model import NeuralNet
from nltk_utils import bag_of_words, tokenize

device = torch.device('cude' if torch.cuda.is_available() else 'cpu')

with open('intents.json', 'r') as f:
    intents = json.load(f)

FILE = "data.pth"
data = torch.load(FILE)

input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data["all_words"]
tags = data["tags"]
model_state = data["model_state"]

model = NeuralNet(input_size, hidden_size, output_size)
model.load_state_dict(model_state)
model.eval()

bot_name = "Gudeg"

def get_response(message):
    sentence = tokenize(message)
    x = bag_of_words(sentence, all_words)
    x = x.reshape(1, x.shape[0])
    x = torch.from_numpy(x)

    output = model(x)
    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]

    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]

    if prob.item() > 0.98:
        for intent in intents["intents"]:
            if tag == intent["tag"]:
                return random.choice(intent['responses'])
    else:
        return "I do not understand..."

if __name__=="__main__":
    while True:
        sentence = input('You: ')
        if sentence == "exit":
            break


        resp=get_response(sentence)
        print(f"{bot_name}: {resp}")

## example.env
IGNORE_CHARACTERS=['?', '!', '.', ',']
BATCH_SIZE=1
HIDDEN_SIZE=37
LEARNING_RATE=0.001
NUM_EPOCHS=1000
DATA_PATH="data.pth"
PORT=8000

## intents.json
{
    "intents": [
        {
            "tag": "greetings",
            "patterns": [
                "Hi",
                "Hey",
                "How are you",
                "Is anyone there",
                "Hello",
                "What's up"
            ],
            "responses": [
                "Hey",
                "Hello, thanks for visiting",
                "Hi there, what can I do for you",
                "Hi there, how can I help"
            ]
        },
        {
            "tag": "products",
            "patterns": [
                "What food do you have",
                "What can I eat",
                "What do you sell",
                "What are your products",
                "What can I buy",
                "Tell me about your menu",
                "Are there any food there"
            ],
            "responses": [
                "We sell javanese traditional cuisines",
                "At Gudeg Martinah, we specialize in Javanese traditional cuisines.",
                "Gudeg, Krecek, Ayam Opor, Telur Bebek, are the main dishes that we have.",
                "Our menu features delicious dishes such as Gudeg, Krecek, Ayam Opor, and Telur Bebek.",
                "For a delightful experience, try our signature dish, Gudeg, or explore other favorites like Krecek and Ayam Opor."
            ]
        },
        {
            "tag": "opening_hours",
            "patterns": [
                "When are you open",
                "What are your opening hours",
                "Tell me your business hours",
                "When do you open and close",
                "Tell me when I can visit",
                "What time do you start serving",
                "When can I eat there"
            ],
            "responses": [
                "We open our doors at 10:00 AM and close at 9:00 PM every day. Feel free to drop by during these hours for a delightful dining experience.",
                "Our regular opening hours are from 10:00 AM to 9:00 PM, seven days a week.",
                "Our team is ready to serve you from 10:00 AM to 9:00 PM throughout the week. Visit us whenever it suits your schedule!"
            ]
        }
    ]
}

## model.py
import torch.nn as nn

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)

        out = self.l2(x)
        out = self.relu(out)

        out = self.l3(x)

        return out

## nltk_utils.py
import nltk
import numpy as np
from nltk.stem.porter import PorterStemmer

stemmer = PorterStemmer()

def tokenize(sentence):
    return nltk.word_tokenize(sentence)

def stem(word):
    return stemmer.stem(word.lower())

def bag_of_words(tokenized_sentence, all_words):
    tokenized_sentence = [stem(w) for w in tokenized_sentence]

    bag = np.zeros(len(all_words), dtype=np.float32)
    for idx, w in enumerate(all_words):
        if w in tokenized_sentence:
            bag[idx] = 1.0
    return bag

## requirements.txt
blinker==1.6.2
click==8.1.4
filelock==3.12.2
Flask==2.3.2
Flask-Cors==4.0.0
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.3.1
MarkupSafe==2.1.3
mpmath==1.3.0
networkx==3.1
nltk==3.8.1
numpy==1.25.1
python-dotenv==1.0.0
regex==2023.6.3
sympy==1.12
torch==2.0.1
tqdm==4.65.0
typing_extensions==4.7.1
Werkzeug==2.3.6

## train.py
import json
import os
from nltk_utils import tokenize, stem, bag_of_words
import numpy as np
from dotenv import load_dotenv
load_dotenv()

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from model import NeuralNet


with open('intents.json', 'r') as f:
    intents = json.load(f)

all_words = []
tags = []
xy = []
for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
        w = tokenize(pattern)
        all_words.extend(w)
        xy.append((w, tag))

ignore_characters = os.getenv('IGNORE_CHARACTERS')
all_words = [stem(w) for w in all_words if w not in ignore_characters]
all_words = sorted(set(all_words))
tags = sorted(set(tags))

x_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    bag = bag_of_words(pattern_sentence, all_words)
    x_train.append(bag)

    label = tags.index(tag)
    y_train.append(label)

x_train = np.array(x_train)
y_train = np.array(y_train)

class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(x_train)
        self.x_data = x_train
        self.y_data = y_train

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples


batch_size = int(os.getenv('BATCH_SIZE'))
hidden_size = int(os.getenv('HIDDEN_SIZE'))
output_size = len(tags)
input_size = len(x_train[0])
learning_rate = float(os.getenv('LEARNING_RATE'))
num_epochs =int(os.getenv('NUM_EPOCHS'))
dataset = ChatDataset()

train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, output_size).to(device)

# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for(words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)

        # forwards
        outputs = model(words)
        loss = criterion(outputs, labels.long())

        # backward and optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if(epoch+1) % 100 == 0:
        print(f'epoch {epoch+1}/{num_epochs}, loss={loss.item():.4f}')

print(f'final loss, loss={loss.item():.4f}')

data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "output_size": output_size,
    "hidden_size": hidden_size,
    "all_words": all_words,
    "tags": tags,
}

FILE = os.getenv('DATA_PATH')
print(f'saving data to {FILE}')
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')
	from flask import Flask, request, jsonify
	from chat import get_response
	from flask_cors import CORS

	app=Flask(__name__)
	CORS(app)

	@app.get("/ping")
	def ping():
	response={"message": "pong"}
	return jsonify(response)

	@app.post("/predict")
	def predict():
	text=request.get_json().get("message")
	answer=get_response(text)
	response={"answer":answer}
	return jsonify(response)

	if __name__=="__main__":
	app.run(port=8000, debug=True)
	import random
	import json
	import torch
	from model import NeuralNet
	from nltk_utils import bag_of_words, tokenize

	device = torch.device('cude' if torch.cuda.is_available() else 'cpu')

	with open('intents.json', 'r') as f:
	intents = json.load(f)

	FILE = "data.pth"
	data = torch.load(FILE)

	input_size = data["input_size"]
	hidden_size = data["hidden_size"]
	output_size = data["output_size"]
	all_words = data["all_words"]
	tags = data["tags"]
	model_state = data["model_state"]

	model = NeuralNet(input_size, hidden_size, output_size)
	model.load_state_dict(model_state)
	model.eval()

	bot_name = "Gudeg"

	def get_response(message):
	sentence = tokenize(message)
	x = bag_of_words(sentence, all_words)
	x = x.reshape(1, x.shape[0])
	x = torch.from_numpy(x)

	output = model(x)
	_, predicted = torch.max(output, dim=1)
	tag = tags[predicted.item()]

	probs = torch.softmax(output, dim=1)
	prob = probs[0][predicted.item()]

	if prob.item() > 0.98:
	for intent in intents["intents"]:
	if tag == intent["tag"]:
	return random.choice(intent['responses'])
	else:
	return "I do not understand..."

	if __name__=="__main__":
	while True:
	sentence = input('You: ')
	if sentence == "exit":
	break


	resp=get_response(sentence)
	print(f"{bot_name}: {resp}")
	IGNORE_CHARACTERS=['?', '!', '.', ',']
	BATCH_SIZE=1
	HIDDEN_SIZE=37
	LEARNING_RATE=0.001
	NUM_EPOCHS=1000
	DATA_PATH="data.pth"
	PORT=8000
	{
	"intents": [
	{
	"tag": "greetings",
	"patterns": [
	"Hi",
	"Hey",
	"How are you",
	"Is anyone there",
	"Hello",
	"What's up"
	],
	"responses": [
	"Hey",
	"Hello, thanks for visiting",
	"Hi there, what can I do for you",
	"Hi there, how can I help"
	]
	},
	{
	"tag": "products",
	"patterns": [
	"What food do you have",
	"What can I eat",
	"What do you sell",
	"What are your products",
	"What can I buy",
	"Tell me about your menu",
	"Are there any food there"
	],
	"responses": [
	"We sell javanese traditional cuisines",
	"At Gudeg Martinah, we specialize in Javanese traditional cuisines.",
	"Gudeg, Krecek, Ayam Opor, Telur Bebek, are the main dishes that we have.",
	"Our menu features delicious dishes such as Gudeg, Krecek, Ayam Opor, and Telur Bebek.",
	"For a delightful experience, try our signature dish, Gudeg, or explore other favorites like Krecek and Ayam Opor."
	]
	},
	{
	"tag": "opening_hours",
	"patterns": [
	"When are you open",
	"What are your opening hours",
	"Tell me your business hours",
	"When do you open and close",
	"Tell me when I can visit",
	"What time do you start serving",
	"When can I eat there"
	],
	"responses": [
	"We open our doors at 10:00 AM and close at 9:00 PM every day. Feel free to drop by during these hours for a delightful dining experience.",
	"Our regular opening hours are from 10:00 AM to 9:00 PM, seven days a week.",
	"Our team is ready to serve you from 10:00 AM to 9:00 PM throughout the week. Visit us whenever it suits your schedule!"
	]
	}
	]
	}
	import torch.nn as nn

	class NeuralNet(nn.Module):
	def __init__(self, input_size, hidden_size, output_size):
	super(NeuralNet, self).__init__()
	self.l1 = nn.Linear(input_size, hidden_size)
	self.l2 = nn.Linear(hidden_size, hidden_size)
	self.l3 = nn.Linear(hidden_size, output_size)
	self.relu = nn.ReLU()

	def forward(self, x):
	out = self.l1(x)
	out = self.relu(out)

	out = self.l2(x)
	out = self.relu(out)

	out = self.l3(x)

	return out
	import nltk
	import numpy as np
	from nltk.stem.porter import PorterStemmer

	stemmer = PorterStemmer()

	def tokenize(sentence):
	return nltk.word_tokenize(sentence)

	def stem(word):
	return stemmer.stem(word.lower())

	def bag_of_words(tokenized_sentence, all_words):
	tokenized_sentence = [stem(w) for w in tokenized_sentence]

	bag = np.zeros(len(all_words), dtype=np.float32)
	for idx, w in enumerate(all_words):
	if w in tokenized_sentence:
	bag[idx] = 1.0
	return bag
	blinker==1.6.2
	click==8.1.4
	filelock==3.12.2
	Flask==2.3.2
	Flask-Cors==4.0.0
	itsdangerous==2.1.2
	Jinja2==3.1.2
	joblib==1.3.1
	MarkupSafe==2.1.3
	mpmath==1.3.0
	networkx==3.1
	nltk==3.8.1
	numpy==1.25.1
	python-dotenv==1.0.0
	regex==2023.6.3
	sympy==1.12
	torch==2.0.1
	tqdm==4.65.0
	typing_extensions==4.7.1
	Werkzeug==2.3.6
	import json
	import os
	from nltk_utils import tokenize, stem, bag_of_words
	import numpy as np
	from dotenv import load_dotenv
	load_dotenv()

	import torch
	import torch.nn as nn
	from torch.utils.data import Dataset, DataLoader

	from model import NeuralNet


	with open('intents.json', 'r') as f:
	intents = json.load(f)

	all_words = []
	tags = []
	xy = []
	for intent in intents['intents']:
	tag = intent['tag']
	tags.append(tag)
	for pattern in intent['patterns']:
	w = tokenize(pattern)
	all_words.extend(w)
	xy.append((w, tag))

	ignore_characters = os.getenv('IGNORE_CHARACTERS')
	all_words = [stem(w) for w in all_words if w not in ignore_characters]
	all_words = sorted(set(all_words))
	tags = sorted(set(tags))

	x_train = []
	y_train = []
	for (pattern_sentence, tag) in xy:
	bag = bag_of_words(pattern_sentence, all_words)
	x_train.append(bag)

	label = tags.index(tag)
	y_train.append(label)

	x_train = np.array(x_train)
	y_train = np.array(y_train)

	class ChatDataset(Dataset):
	def __init__(self):
	self.n_samples = len(x_train)
	self.x_data = x_train
	self.y_data = y_train

	def __getitem__(self, index):
	return self.x_data[index], self.y_data[index]

	def __len__(self):
	return self.n_samples


	batch_size = int(os.getenv('BATCH_SIZE'))
	hidden_size = int(os.getenv('HIDDEN_SIZE'))
	output_size = len(tags)
	input_size = len(x_train[0])
	learning_rate = float(os.getenv('LEARNING_RATE'))
	num_epochs =int(os.getenv('NUM_EPOCHS'))
	dataset = ChatDataset()

	train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model = NeuralNet(input_size, hidden_size, output_size).to(device)

	# loss and optimizer
	criterion = nn.CrossEntropyLoss()
	optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

	for epoch in range(num_epochs):
	for(words, labels) in train_loader:
	words = words.to(device)
	labels = labels.to(device)

	# forwards
	outputs = model(words)
	loss = criterion(outputs, labels.long())

	# backward and optimizer step
	optimizer.zero_grad()
	loss.backward()
	optimizer.step()

	if(epoch+1) % 100 == 0:
	print(f'epoch {epoch+1}/{num_epochs}, loss={loss.item():.4f}')

	print(f'final loss, loss={loss.item():.4f}')

	data = {
	"model_state": model.state_dict(),
	"input_size": input_size,
	"output_size": output_size,
	"hidden_size": hidden_size,
	"all_words": all_words,
	"tags": tags,
	}

	FILE = os.getenv('DATA_PATH')
	print(f'saving data to {FILE}')
	torch.save(data, FILE)

	print(f'training complete. file saved to {FILE}')