Skip to content

Instantly share code, notes, and snippets.

@jonasft
Created June 5, 2016 19:31
Show Gist options
  • Save jonasft/d29221ef3c09f720068513c1dae49681 to your computer and use it in GitHub Desktop.
Save jonasft/d29221ef3c09f720068513c1dae49681 to your computer and use it in GitHub Desktop.
import re
import numpy as np
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.preprocessing import normalize
from data import resources
from transformers.tfidf_transformer import TfidfTransformer
class NegTransformer(TransformerMixin, BaseEstimator):
def __init__(self, norm=True):
self.normalize = norm
def fit(self, X, y=None):
return self
def transform(self, data):
return self._number_of_negations(data)
def _number_of_negations(self, data):
matrix = np.zeros((len(data), 1))
data = TfidfTransformer().process_negation_in_dataset(data)
for i, raw_tweet in enumerate(data):
for token in raw_tweet.split():
negated_regex = r'.*_NEG(?:FIRST)?$'
if re.match(negated_regex, token):
matrix[i] += 1
return normalize(matrix) if self.normalize else matrix
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment