Skip to content

Instantly share code, notes, and snippets.

View alabrashJr's full-sized avatar

Abdurrahman Beyaz alabrashJr

View GitHub Profile
import argparse
import pandas as pd
import pymongo
from datetime import datetime
def args():
parser = argparse.ArgumentParser()
parser.add_argument("path", help="path to ebay index html")
parser.add_argument("mongodb",help="ip to mongodb",default="mongodb://localhost:27017/")
parser.add_argument("mongodb_prname",help="monogdb project name",default="shelock")
import pysolr
from xml.etree import ElementTree
import argparse
skip = set(["shippingDetails", "priceDetail", "dailyOfferCategory", "stockInfo_availability", "abroad",
"imageDetail", "bigImageUrl", "specialOffer", "editorsChoice", "shippingDate", "shippingTime", "types",
"stockInfo",
"hasVariant", "member", "images", "shippingFee", "image", "globalTradeItemNumber", "quantity",
"soldQuantity", "variants", "subTitle"
, 'windowOptionFlag', 'productLine', 'affiliate'])
def predict(text):
print("text -> ",text)
X=vectorizer.transform([text_s])
X_pred=lp_classifier.predict(X).toarray()
return [data.columns[i] for i,x in enumerate(X_pred[0]) if x==1]
text="كان العمال في الورشة لطفاء وكان عملهن جيد جداً"
predict(test_txt)
labels=["الجودة","السلوك","مراجعة","الاستخدام","السعر"]
y_train=train[labels]
y_test=test[labels]
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, random_state=42, test_size=0.30, shuffle=True)
train_text = train['النص'].values.astype('U')
test_text = test['النص'].values.astype('U')
from nltk import word_tokenize
from nltk.tokenize import RegexpTokenizer
#!pip install TurkishStemmer
from TurkishStemmer import TurkishStemmer
stemmer = TurkishStemmer()
tokenizer = RegexpTokenizer(r'\w+')
punct_re=lambda x :" ".join(tokenizer.tokenize(x.lower()))
def stemmer_char(text,i):
return " ".join([word[:i] for word in word_tokenize(text)])
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, random_state=42, test_size=0.30, shuffle=True)
train_text = train['TurkishStemmer'].values.astype('U')
test_text = test['TurkishStemmer'].values.astype('U')
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(strip_accents='unicode', analyzer='word', ngram_range=(1,3), norm='l2', max_features = 10000)
vectorizer.fit(train_text)
vectorizer.fit(test_text)
X_train = vectorizer.transform(train_text)
x_test = vectorizer.transform(test_text)
labels=["Fiyat","Geri Bildirim","Kullanım","Tavır ve davranış","Kalite"]
y_train=train[labels]
y_test=test[labels]
!pip install scikit-multilearn
!pip install scikit-learn
from sklearn.linear_model import LogisticRegression
from skmultilearn.problem_transform import LabelPowerset
from sklearn.metrics import f1_score,accuracy_score
lp_classifier = LabelPowerset(LogisticRegression())
lp_classifier.fit(X_train, y_train)
lp_predictions = lp_classifier.predict(x_test)
print("Accuracy = ",accuracy_score(y_test,lp_predictions))
print("F1 score = ",f1_score(y_test,lp_predictions, average="micro"))