This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import time | |
import io | |
import json | |
from langdetect import detect | |
from snips_nlu import SnipsNLUEngine | |
import time | |
# with io.open("/home/custom-bot/sms/email_dataset.json") as f: | |
# sample_dataset = json.load(f) | |
with io.open("/home/ehz/Downloads/data_only_bn_new.json") as f: | |
sample_dataset_bn = json.load(f) | |
# nlu_engine = SnipsNLUEngine() | |
# nlu_engine.fit(sample_dataset) | |
nlu_engine_bn = SnipsNLUEngine() | |
nlu_engine_bn.fit(sample_dataset_bn) | |
RESPONSE_BN = {"SMS_balance_Q": "এস এম এস চেক করে কিভাবে"} | |
# RESPONSE_EN = {"prepaid_SMS_bundle_q": "Dear Customer, please dial *123*2*7*1# to purchase 100 SMS with 3 days validity at BDT5 (including SD, VAT and SC). To check remaining SMS"} | |
# creating a Flask app | |
#app = Flask(__name__) | |
def isASCII(data): | |
try: | |
data.encode().decode('ASCII') | |
except UnicodeDecodeError: | |
return False | |
else: | |
return True | |
#unclean_data = pd.read_csv('/home/custom-bot/sms/wow_call_6months.csv') | |
# print(unclean_data) | |
# print(unclean_data.columns.tolist()) | |
# print(unclean_data['Subject']) | |
# super_clean_data = unclean_data[[' Subject',' Customer-Email']].dropna(axis=0, how='any') | |
#super_clean_data = unclean_data['Customer SMS'].dropna(axis=0, how='any') | |
# print(super_clean_data) | |
# super clean data to text | |
# super_clean_data.to_csv(r'/home/ehz/Downloads/super_clean_data.txt', header=None, index = False, sep='\t', mode='a') | |
#super_clean_data.to_csv(r'/home/custom-bot/sms/wow_call_6months.txt', header=None, index = False, mode='a') | |
f = open('/home/ehz/Downloads/test_data.txt', 'r') | |
# print(f.readlines()) | |
# count = 0 | |
# | |
# for line in f: | |
# count += 1 | |
# print(base64.b64decode(line)) | |
# print("{}{}".format(count, line.strip())) | |
def home(text): | |
f = open("/home/ehz/results_SMS_results_Q.csv", "a") | |
fl = open("/home/ehz/results_SMS_results_Q.log", "a") | |
req=1 | |
if(req): | |
#parsing = nlu_engine.parse("Minute bundle kine kivabe Robi te") | |
#data = request.get_json() | |
#data = "hello world" | |
#lang = detect(data) | |
data = text | |
print(str(data)) | |
lang = detect(data) | |
# parsing = nlu_engine.parse(str(data)) | |
# print(parsing) | |
# print(parsing['intent']['intentName']) | |
# intent = parsing['intent']['intentName'] | |
# probability = parsing['intent']['probability'] | |
if(isASCII(data)): | |
pass | |
# if(intent is None): | |
# intent='None' | |
# if(probability is None): | |
# probability='None' | |
# if(intent=='None'): | |
# if(data.find('mb')>-1 or data.find('GB')>-1 or data.find('gb')>-1): | |
# intent='email_clm_data_q' | |
# fl.write(str(intent)+'|email_clm_data_q)|'+str(probability)+'|'+str(data)+'\n') | |
# fl.close() | |
# if(probability<0.40): | |
# if(data.find('mb')>-1 or data.find('GB')>-1 or data.find('gb')>-1): | |
# intent='email_CLM_data_q' | |
# fl.write(str(intent)+'|email_clm_data_q)|'+str(probability)+'|'+str(data)+'\n') | |
# fl.close() | |
# f.write(str(data).rstrip()+'|email|'+intent+'|'+str(probability)+'\n') | |
# f.close() | |
# if(lang=='en'): | |
# if(intent in RESPONSE_EN): | |
# return {'text':RESPONSE_EN[intent]} | |
# else: | |
# return {'text':'no intent found'} | |
# else: | |
# if(intent in RESPONSE_BN): | |
# return {'text':RESPONSE_BN[intent]} | |
# else: | |
# return {'text':'kono intent paoya jaini'} | |
else: | |
print("Using BN Model.") | |
parsing = nlu_engine_bn.parse(str(data)) | |
print(parsing) | |
intent = parsing['intent']['intentName'] | |
probability = parsing['intent']['probability'] | |
if(intent is None): | |
intent='None' | |
if(probability is None): | |
probability='None' | |
# if(intent=='None'): | |
# if('এয়াটেল' in str(data) or 'এয়ারটেলে' in str(data) or 'এয়ারটেল' in str(data)): | |
# intent='email_airtel_related_query_in_robi_helpline' | |
# if('জি.বি' in str(data) or 'এমবি' in str(data) or 'জিবির' in str(data) or 'এমবি' in str(data) or 'এম্বি' in str(data) or 'জিবি' in str(data)): | |
# intent='email_CLM_data_q' | |
# if(probability<0.40): | |
# if('জি.বি' in str(data) or 'এমবি' in str(data) or 'জিবির' in str(data) or 'এমবি' in str(data) or 'এম্বি' in str(data) or 'জিবি' in str(data)): | |
# intent='email_CLM_data_q' | |
f.write(str(data).rstrip()+'|sms|'+intent+'|'+str(probability)+'\n') | |
f.close() | |
#parsing = nlu_engine_bn.parse(str(data)) | |
#print(parsing) | |
#intent = parsing['intent']['intentName'] | |
if(intent in RESPONSE_BN): | |
return {'text':RESPONSE_BN[intent]} | |
else: | |
return {'text':'kono intent paoya jaini [BN]'} | |
return {'data': 'haha'} | |
def isascii(s): | |
"""Check if the characters in string s are in ASCII, U+0-U+7F.""" | |
return len(s) == len(s.encode()) | |
for line in f: | |
print(line) | |
words=line.split(' ') | |
if(len(words)>=5): | |
response = home(line) | |
print(response) | |
print('\n') | |
time.sleep(1.4) |
Author
ehzawad
commented
Jan 18, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment