gwerbin/lrml_torch.py

## lrml_torch.py
import sys
from time import perf_counter

import numpy as np
import torch
import torch.nn.functional as F
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tqdm import tqdm

rs = np.random.RandomState(4890702)

x, y = make_classification(
    n_samples=5000,
    n_classes=8,
    n_features=200,
    n_informative=100,
    random_state=rs
)

x = x.astype(np.float32)
y = y.astype(np.int64)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, stratify=y)

y_trans = LabelBinarizer()


def array_islice(data, batch_size):
    bounds = np.arange(0, data.shape[0]+batch_size, batch_size)
    for i0, i1 in zip(bounds[:-1], bounds[1:]):
        if isinstance(data, (pd.Series, pd.DataFrame)):
            yield data.iloc[i0 : i1]
        else:
            yield data[i0 : i1]

x_train_t = x_train
y_train_t = y_trans.fit_transform(y_train)

x_test_t = x_test
y_test_t = y_trans.transform(y_test)

n_epoch = 25
batch_size = x_train_t.shape[0] // 25

model = torch.nn.Linear(x_train_t.shape[1], y_train_t.shape[1])
criterion = torch.nn.MultiLabelSoftMarginLoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

t0 = perf_counter()
loss_train_seq = []
loss_test_seq = []
acc_train_seq = []
acc_test_seq = []
with tqdm(desc='epochs', total=n_epoch) as progress:
    progress.set_postfix({'loss': None})
    for epoch in range(n_epoch):
        data_gen = zip(
            array_islice(x_train_t, batch_size),
            array_islice(y_train_t, batch_size)
        )

        loss_epoch_seq = []
        epoch_n_correct = 0
        epoch_n_total = 0
        for batch_x, batch_y in data_gen:
            x_in = torch.autograd.Variable(torch.Tensor(batch_x))
            y_in = torch.autograd.Variable(torch.Tensor(batch_y))
            y_out = model(x_in)
            loss = criterion(y_out, y_in)
            loss_epoch_seq.append(float(loss))
            epoch_n_correct += (y_out.argmax(dim=1) == y_in.argmax(dim=1)).sum()
            epoch_n_total += y_in.shape[0]
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        loss_epoch = np.mean(loss_epoch_seq)
        loss_train_seq.append(loss_epoch)

        acc_epoch = float(epoch_n_correct) / float(epoch_n_total)
        acc_train_seq.append(acc_epoch)

        x_test_in = torch.autograd.Variable(torch.Tensor(x_test_t))
        y_test_in = torch.autograd.Variable(torch.Tensor(y_test_t))
        y_test_out = model(x_test_in)
        loss_test = float(criterion(y_test_out, y_test_in))
        loss_test_seq.append(loss_test)
        acc_test = float((y_test_out.argmax(dim=1) == y_test_in.argmax(dim=1)).sum()) / y_test_in.shape[0]
        acc_test_seq.append(acc_test)

        progress.set_postfix({'loss_train': loss_epoch, 'acc_train': acc_epoch, 'loss_test': loss_test, 'acc_test': acc_test})
        progress.update(1)
t1 = perf_counter()
print(end='', flush=True, file=sys.stderr)
print(t1-t0)
	import sys
	from time import perf_counter

	import numpy as np
	import torch
	import torch.nn.functional as F
	from sklearn.datasets import make_classification
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import LabelBinarizer
	from tqdm import tqdm

	rs = np.random.RandomState(4890702)

	x, y = make_classification(
	n_samples=5000,
	n_classes=8,
	n_features=200,
	n_informative=100,
	random_state=rs
	)

	x = x.astype(np.float32)
	y = y.astype(np.int64)

	x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, stratify=y)

	y_trans = LabelBinarizer()


	def array_islice(data, batch_size):
	bounds = np.arange(0, data.shape[0]+batch_size, batch_size)
	for i0, i1 in zip(bounds[:-1], bounds[1:]):
	if isinstance(data, (pd.Series, pd.DataFrame)):
	yield data.iloc[i0 : i1]
	else:
	yield data[i0 : i1]

	x_train_t = x_train
	y_train_t = y_trans.fit_transform(y_train)

	x_test_t = x_test
	y_test_t = y_trans.transform(y_test)

	n_epoch = 25
	batch_size = x_train_t.shape[0] // 25

	model = torch.nn.Linear(x_train_t.shape[1], y_train_t.shape[1])
	criterion = torch.nn.MultiLabelSoftMarginLoss(reduction='mean')
	optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

	t0 = perf_counter()
	loss_train_seq = []
	loss_test_seq = []
	acc_train_seq = []
	acc_test_seq = []
	with tqdm(desc='epochs', total=n_epoch) as progress:
	progress.set_postfix({'loss': None})
	for epoch in range(n_epoch):
	data_gen = zip(
	array_islice(x_train_t, batch_size),
	array_islice(y_train_t, batch_size)
	)

	loss_epoch_seq = []
	epoch_n_correct = 0
	epoch_n_total = 0
	for batch_x, batch_y in data_gen:
	x_in = torch.autograd.Variable(torch.Tensor(batch_x))
	y_in = torch.autograd.Variable(torch.Tensor(batch_y))
	y_out = model(x_in)
	loss = criterion(y_out, y_in)
	loss_epoch_seq.append(float(loss))
	epoch_n_correct += (y_out.argmax(dim=1) == y_in.argmax(dim=1)).sum()
	epoch_n_total += y_in.shape[0]
	optimizer.zero_grad()
	loss.backward()
	optimizer.step()

	loss_epoch = np.mean(loss_epoch_seq)
	loss_train_seq.append(loss_epoch)

	acc_epoch = float(epoch_n_correct) / float(epoch_n_total)
	acc_train_seq.append(acc_epoch)

	x_test_in = torch.autograd.Variable(torch.Tensor(x_test_t))
	y_test_in = torch.autograd.Variable(torch.Tensor(y_test_t))
	y_test_out = model(x_test_in)
	loss_test = float(criterion(y_test_out, y_test_in))
	loss_test_seq.append(loss_test)
	acc_test = float((y_test_out.argmax(dim=1) == y_test_in.argmax(dim=1)).sum()) / y_test_in.shape[0]
	acc_test_seq.append(acc_test)

	progress.set_postfix({'loss_train': loss_epoch, 'acc_train': acc_epoch, 'loss_test': loss_test, 'acc_test': acc_test})
	progress.update(1)
	t1 = perf_counter()
	print(end='', flush=True, file=sys.stderr)
	print(t1-t0)