Skip to content

Instantly share code, notes, and snippets.

@prrao87
Last active August 30, 2019 20:56
Show Gist options
  • Save prrao87/5e8fb3b674b95c8f33402268fa0b98c0 to your computer and use it in GitHub Desktop.
Save prrao87/5e8fb3b674b95c8f33402268fa0b98c0 to your computer and use it in GitHub Desktop.
class LogisticRegressionSentiment(Base):
"""Predict fine-grained sentiment scores using a sklearn Logistic Regression pipeline."""
def __init__(self, model_file: str=None) -> None:
super().__init__()
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
self.pipeline = Pipeline(
[
('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', LogisticRegression(solver='liblinear', multi_class='auto')),
]
)
def predict(self, train_file: str, test_file: str, lower_case: bool=False) -> pd.DataFrame:
"Train model using sklearn pipeline"
train_df = self.read_data(train_file, lower_case)
learner = self.pipeline.fit(train_df['text'], train_df['truth'])
# Predict class labels using the learner and output DataFrame
test_df = self.read_data(test_file, lower_case)
test_df['pred'] = learner.predict(test_df['text'])
return test_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment