köpek midir? | hangi etkit ? | hangi etiketler? |
---|---|---|
Evet/Hayırikili | köpek/doğa/çimençok sınıflı | köpek,doğa,çimen****çoklu etiket |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#fname: the file name of binary file <google_w2v.bin> | |
#vocab: vocabulary dictionary | |
function load_bin_vec(fname, vocab) | |
pc(s)=return convert(Char,s[1]) | |
word_vecs = Dict() | |
open(fname, "r") do f | |
@show header = readline(f) | |
vocab_size, layer1_size = map(pf, split(header)) | |
@show binary_len = sizeof(Float32) * layer1_size |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests \n", | |
"from bs4 import BeautifulSoup\n", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#this script was written by Cigil and edit by aalabrash18@ku.edu.tr | |
import pandas as pd | |
from lxml import etree | |
import re | |
import os, sys, io, traceback, codecs | |
from bs4 import BeautifulSoup, Comment | |
UNESCAPE = True | |
import html as h | |
import glob |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict(text): | |
print("text -> ",text) | |
text_s=[stemmer.stem(w) for w in text.split()] | |
print("text stemmed-> ",text_s) | |
X=vectorizer.transform([text_s]) | |
X_pred=lp_classifier.predict(X).toarray() | |
return [data.columns[i] for i,x in enumerate(X_pred[0]) if x==1] | |
text="araçım servise son getirmediğimde düzgün tamir edilmedi memnun değil" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install scikit-multilearn | |
!pip install scikit-learn | |
from sklearn.linear_model import LogisticRegression | |
from skmultilearn.problem_transform import LabelPowerset | |
from sklearn.metrics import f1_score,accuracy_score | |
lp_classifier = LabelPowerset(LogisticRegression()) | |
lp_classifier.fit(X_train, y_train) | |
lp_predictions = lp_classifier.predict(x_test) | |
print("Accuracy = ",accuracy_score(y_test,lp_predictions)) | |
print("F1 score = ",f1_score(y_test,lp_predictions, average="micro")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
labels=["Fiyat","Geri Bildirim","Kullanım","Tavır ve davranış","Kalite"] | |
y_train=train[labels] | |
y_test=test[labels] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import TfidfVectorizer | |
vectorizer = TfidfVectorizer(strip_accents='unicode', analyzer='word', ngram_range=(1,3), norm='l2', max_features = 10000) | |
vectorizer.fit(train_text) | |
vectorizer.fit(test_text) | |
X_train = vectorizer.transform(train_text) | |
x_test = vectorizer.transform(test_text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
train, test = train_test_split(data, random_state=42, test_size=0.30, shuffle=True) | |
train_text = train['TurkishStemmer'].values.astype('U') | |
test_text = test['TurkishStemmer'].values.astype('U') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk import word_tokenize | |
from nltk.tokenize import RegexpTokenizer | |
#!pip install TurkishStemmer | |
from TurkishStemmer import TurkishStemmer | |
stemmer = TurkishStemmer() | |
tokenizer = RegexpTokenizer(r'\w+') | |
punct_re=lambda x :" ".join(tokenizer.tokenize(x.lower())) | |
def stemmer_char(text,i): | |
return " ".join([word[:i] for word in word_tokenize(text)]) |
OlderNewer