This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: '3' | |
services: | |
preprocessor: | |
image: some_image | |
environment: | |
- PYTHONPATH=. | |
command: some_command | |
networks: | |
default: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
``` | |
usage | |
------- | |
>>> class MyModel(LogisticRegression, ROCCurveSupportMixin): | |
>>> pass | |
>>> | |
>>> model = MyModel(C=3.) | |
>>> model.fit(X_train, y_train) | |
>>> model.plot_roc_curves(X_test, y_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
``` | |
example of usage | |
>>> model = gensim.models.Doc2Vec(min_count=1, vector_size=300) | |
>>> trained_model = gensim.models.KeyedVectors.load_word2vec_format('model.vec', binary=False) | |
>>> model.build_vocab(corpus) | |
>>> model = load_pretrained_word_vectors(model, trained_model) | |
>>> model.train(**params) | |
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.mixture import GaussianMixture | |
class SCDV(object): | |
def __init__(self, word_embedding_model): | |
self._word_embedding_model = word_embedding_model | |
self._tfidf = TfidfVectorizer(use_idf=True) | |
self._gmm = GaussianMixture() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import CaboCha | |
class CaboChaGenerator(object): | |
def __init__(self): | |
self.c = CaboCha.Parser() | |
def parse_to_chunk(self, text): | |
tree = self.c.parse(text) | |
for i in range(tree.chunk_size()): | |
chunk = tree.chunk(i) | |
chunk_tokens = [ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class PMIVectorizer(CountVectorizer): | |
def _preprocess(self, x): | |
# raise NotImplementedError | |
return x | |
def fit_transform(self, x, y=None): | |
x = self._preprocess(x) | |
vecs = super().fit_transform(x) | |
return self._calc(vecs) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import MeCab | |
class MeCabGenerator(object): | |
def __init__(self, dict_path='/usr/local/lib/mecab/dic/mecab-ipadic-neologd'): | |
self.mecab = MeCab.Tagger ('-d {}'.format(dict_path)) | |
def parse(self, text): | |
return self._generator_wrapper(text) | |
def get_morphemes(self, text, surface_filter=lambda x: x): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from keras.models import Model, Sequential | |
from keras.regularizers import l2 | |
from logging import getLogger, StreamHandler, Formatter, INFO | |
from typing import Union, Optional | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from io import BytesIO | |
import cv2 | |
from fnmatch import fnmatch | |
from google.cloud.storage import Client, Blob | |
import numpy as np | |
import pandas as pd | |
import os.path | |
from pathlib import Path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import chainer.functions as F | |
try: | |
from chainer.backend import get_array_module | |
except: # if you don't use cuda | |
import numpy as np | |
get_array_module = lambda x: np | |
def focal_loss(y_pred, y_true, scale=2): | |
xp = get_array_module(y_pred) | |