This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_extraction.text import CountVectorizer | |
class ProjectionCountVectorizer(CountVectorizer): | |
def __init__(self, projection_path, *args, **kwargs): | |
self.projection_path = projection_path.split('/') | |
super().__init__(*args, **kwargs) | |
def build_preprocessor(self): | |
built = super().build_preprocessor() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class OneHotTransformer: | |
def __init__(self, func): | |
self.f = func | |
def fit(self, X, y=None): | |
unseen = object() | |
seen = set() | |
for x in X: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class DirectTransformer: | |
"""Utility for building class-like features from a single-point function, but that may need | |
some general configuration first (you usually override __init__ for that) | |
""" | |
def fit(self, X, y=None): | |
return self |