Skip to content

Instantly share code, notes, and snippets.

View Ab1992ao's full-sized avatar

Alexander Abramov Ab1992ao

View GitHub Profile
@Ab1992ao
Ab1992ao / fit.py
Created February 26, 2019 08:18 — forked from MaxHalford/fit.py
Keras fit/predict scikit-learn pipeline
import os
from keras import backend as K
from keras import callbacks
from keras import layers
from keras import models
from keras.wrappers.scikit_learn import KerasClassifier
import pandas as pd
import tensorflow as tf
from sklearn import metrics
@Ab1992ao
Ab1992ao / Loss.py
Created September 6, 2019 06:25 — forked from snakers4/Loss.py
Multi class classification focal loss
import torch
import torch.nn as nn
import torch.nn.functional as F
# Focal loss implementation inspired by
# https://github.com/c0nn3r/RetinaNet/blob/master/focal_loss.py
# https://github.com/doiken23/pytorch_toolbox/blob/master/focalloss2d.py
class MultiClassBCELoss(nn.Module):
def __init__(self,
use_weight_mask=False,
@Ab1992ao
Ab1992ao / load_toxic_data.py
Created May 17, 2021 08:40
prepare toxic data for multitask learning
def load_toxic_data(tox_path):
tox = pd.read_csv(tox_path)
#remove ' ' before and after text
tox['text'] = tox['text'].map(lambda x: str(x).lstrip().rstrip())
#toxic = 1, other = 0
tox['sentiment'] = tox['sentiment'].map(lambda x: 0 if x in ['positive','neutral'] else 1)
toxic_text, toxic_labels = tox.text.values, tox.sentiment.values
return toxic_text, toxic_labels
@Ab1992ao
Ab1992ao / load_ner_data.py
Created May 17, 2021 09:07
prepare ner data for multitask learning pipe
def load_ner_data(ner_path, seq_len=24):
data = pd.read_csv(ner_path, encoding= 'unicode_escape', sep=',')
data = data.fillna(method='ffill')
grouped_s = data.groupby('Sentence #', as_index=True)['Word'].apply(lambda g: ' '.join(g))
grouped_t = data.groupby('Sentence #', as_index=True)['Tag'].apply(lambda g: ' '.join(g))
ner_tr = pd.DataFrame({}, columns=['sentence', 'tag'] )
ner_tr['sentence'] = [st for st in grouped_s.values if len(st.split())<=seq_len]
ner_tr['tag'] = [ tg.split() for tg in grouped_t if len(tg.split())<=seq_len]
@Ab1992ao
Ab1992ao / triplet_generator.py
Created May 17, 2021 09:16
generate triplet data for multitask learning pipe
class TripletGenerator:
def __init__(self, datadict, hard_frac = 0.5, batch_size=256):
self.datadict = datadict
self._anchor_idx = np.array(list(self.datadict.keys()))
self._hard_frac = hard_frac
self.generator = self.generate_batch(batch_size)
def generate_batch(self, size):
while True:
@Ab1992ao
Ab1992ao / multiclass_generator.py
Created May 17, 2021 09:18
generate clf data for multitask pipe
class MulticlassGenerator:
def __init__(self, data_tuple, batch_size=256):
self._data = data_tuple
self._idx = np.arange(len(data_tuple[-1]))
self.generator = self.generate_batch(batch_size)
def generate_batch(self, size):
while True:
px_ids = np.random.choice(self._idx, size, replace=False)
samples = [p[px_ids] for p in self._data[:-1]]
@Ab1992ao
Ab1992ao / multitask_data_generator.py
Created May 17, 2021 09:23
complex data generator for multitask pipe
class MultitaskDataGenerator:
def __init__(self, generators):
self.generators = generators
self.generator = self.generate_batch()
def generate_batch(self, batch_size=None):
while True:
batch = self.__next__()
yield batch
@Ab1992ao
Ab1992ao / joint_loss.py
Last active June 2, 2021 22:12
agg loss for multitask pipe
def softmax_loss(vectors):
anc, pos, neg = vectors
c = 0.5
anc = c * anc
pos = c * pos
neg = c * neg
pos_sim = tf.reduce_sum((anc * pos), axis=-1, keepdims=True)
@Ab1992ao
Ab1992ao / build_mltsk_model.py
Last active June 3, 2021 09:06
get mltsk NN model
class SBERT:
def __init__(self, config):
self.loss = 0
self.metrics = []
self.inputs = []
self.config = config
self.build()
def build(self):
@Ab1992ao
Ab1992ao / toxic_task_callback.py
Created May 17, 2021 10:08
evaluate toxic task head by mltsk pipe
class AucCallback(Callback):
def __init__(self, dataset, call_model=None, savepath=None, name="AUC"):
self.call_model = call_model
self.dataset = dataset
self.best = 0
self.name = name
self.savepath = savepath
super(AucCallback, self).__init__()