This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import pandas as pd | |
import pymongo | |
from datetime import datetime | |
def args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("path", help="path to ebay index html") | |
parser.add_argument("mongodb",help="ip to mongodb",default="mongodb://localhost:27017/") | |
parser.add_argument("mongodb_prname",help="monogdb project name",default="shelock") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pysolr | |
from xml.etree import ElementTree | |
import argparse | |
skip = set(["shippingDetails", "priceDetail", "dailyOfferCategory", "stockInfo_availability", "abroad", | |
"imageDetail", "bigImageUrl", "specialOffer", "editorsChoice", "shippingDate", "shippingTime", "types", | |
"stockInfo", | |
"hasVariant", "member", "images", "shippingFee", "image", "globalTradeItemNumber", "quantity", | |
"soldQuantity", "variants", "subTitle" | |
, 'windowOptionFlag', 'productLine', 'affiliate']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict(text): | |
print("text -> ",text) | |
X=vectorizer.transform([text_s]) | |
X_pred=lp_classifier.predict(X).toarray() | |
return [data.columns[i] for i,x in enumerate(X_pred[0]) if x==1] | |
text="كان العمال في الورشة لطفاء وكان عملهن جيد جداً" | |
predict(test_txt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
labels=["الجودة","السلوك","مراجعة","الاستخدام","السعر"] | |
y_train=train[labels] | |
y_test=test[labels] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
train, test = train_test_split(data, random_state=42, test_size=0.30, shuffle=True) | |
train_text = train['النص'].values.astype('U') | |
test_text = test['النص'].values.astype('U') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk import word_tokenize | |
from nltk.tokenize import RegexpTokenizer | |
#!pip install TurkishStemmer | |
from TurkishStemmer import TurkishStemmer | |
stemmer = TurkishStemmer() | |
tokenizer = RegexpTokenizer(r'\w+') | |
punct_re=lambda x :" ".join(tokenizer.tokenize(x.lower())) | |
def stemmer_char(text,i): | |
return " ".join([word[:i] for word in word_tokenize(text)]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
train, test = train_test_split(data, random_state=42, test_size=0.30, shuffle=True) | |
train_text = train['TurkishStemmer'].values.astype('U') | |
test_text = test['TurkishStemmer'].values.astype('U') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import TfidfVectorizer | |
vectorizer = TfidfVectorizer(strip_accents='unicode', analyzer='word', ngram_range=(1,3), norm='l2', max_features = 10000) | |
vectorizer.fit(train_text) | |
vectorizer.fit(test_text) | |
X_train = vectorizer.transform(train_text) | |
x_test = vectorizer.transform(test_text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
labels=["Fiyat","Geri Bildirim","Kullanım","Tavır ve davranış","Kalite"] | |
y_train=train[labels] | |
y_test=test[labels] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install scikit-multilearn | |
!pip install scikit-learn | |
from sklearn.linear_model import LogisticRegression | |
from skmultilearn.problem_transform import LabelPowerset | |
from sklearn.metrics import f1_score,accuracy_score | |
lp_classifier = LabelPowerset(LogisticRegression()) | |
lp_classifier.fit(X_train, y_train) | |
lp_predictions = lp_classifier.predict(x_test) | |
print("Accuracy = ",accuracy_score(y_test,lp_predictions)) | |
print("F1 score = ",f1_score(y_test,lp_predictions, average="micro")) |
NewerOlder