Christos Baziotis cbaziotis

## preprocess_multilingual_denoising.py
#!/bin/bash
#-----------------------------------------------------------------------------------------------------------------------
# This script contains the preprocessing pipeline for some predefined datasets.
# 1. It learns a joint sentencepiece model on a sub-set of the data (training-side)
# 2. It tokenizes with the sentencepice model all the data
# 3. It binarizes them for training with faireq
#-----------------------------------------------------------------------------------------------------------------------

DATASET=$1
#DATASET=flores_neen

## pytorch_resume_training.py

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F

import matplotlib.pyplot as plt

import numpy as np

## SelfAttention.py
class SelfAttention(nn.Module):
    def __init__(self, attention_size, batch_first=False, non_linearity="tanh"):
        super(SelfAttention, self).__init__()

        self.batch_first = batch_first
        self.attention_weights = Parameter(torch.FloatTensor(attention_size))
        self.softmax = nn.Softmax(dim=-1)

        if non_linearity == "relu":
            self.non_linearity = nn.ReLU()

## ubuntu16_tensorflow_cuda8.sh
# This is shorthened version of blog post
#  http://ksopyla.com/2017/02/tensorflow-gpu-virtualenv-python3/

# update packages
sudo apt-get update
sudo apt-get upgrade

#Add the ppa repo for NVIDIA graphics driver
sudo add-apt-repository ppa:graphics-drivers/ppa
sudo apt-get update

## Deep Learning with Python 3 on ubuntu 16.04.sh
# This is inspired by the fantastic guide https://github.com/saiprashanths/dl-setup
# I have just updated the python-related commands so that everything works in Python 3.
# Tested on Xubuntu 16.04.

# First of all let's update the repos:
sudo apt-get update

# Only if you have a CUDA-compatible Nvidia card, install CUDA.
# Check on the Nvidia website what is the latest driver version which supports your card.
# At the time of this writing it was 367.

## class_weights.py
def get_class_weights(y, smooth_factor=0):
    """
    Returns the weights for each class based on the frequencies of the samples
    :param smooth_factor: factor that smooths extremely uneven weights
    :param y: list of true labels (the labels must be hashable)
    :return: dictionary with the weight for each class
    """
    counter = Counter(y)

    if smooth_factor > 0:

## AttentionWithContext.py
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':

## Attention.py

from keras import backend as K, initializers, regularizers, constraints
from keras.engine.topology import Layer


def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
	#!/bin/bash
	#-----------------------------------------------------------------------------------------------------------------------
	# This script contains the preprocessing pipeline for some predefined datasets.
	# 1. It learns a joint sentencepiece model on a sub-set of the data (training-side)
	# 2. It tokenizes with the sentencepice model all the data
	# 3. It binarizes them for training with faireq
	#-----------------------------------------------------------------------------------------------------------------------

	DATASET=$1
	#DATASET=flores_neen

	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.autograd import Variable
	import torch.nn.functional as F

	import matplotlib.pyplot as plt

	import numpy as np
	class SelfAttention(nn.Module):
	def __init__(self, attention_size, batch_first=False, non_linearity="tanh"):
	super(SelfAttention, self).__init__()

	self.batch_first = batch_first
	self.attention_weights = Parameter(torch.FloatTensor(attention_size))
	self.softmax = nn.Softmax(dim=-1)

	if non_linearity == "relu":
	self.non_linearity = nn.ReLU()
	# This is shorthened version of blog post
	# http://ksopyla.com/2017/02/tensorflow-gpu-virtualenv-python3/

	# update packages
	sudo apt-get update
	sudo apt-get upgrade

	#Add the ppa repo for NVIDIA graphics driver
	sudo add-apt-repository ppa:graphics-drivers/ppa
	sudo apt-get update
	# This is inspired by the fantastic guide https://github.com/saiprashanths/dl-setup
	# I have just updated the python-related commands so that everything works in Python 3.
	# Tested on Xubuntu 16.04.

	# First of all let's update the repos:
	sudo apt-get update

	# Only if you have a CUDA-compatible Nvidia card, install CUDA.
	# Check on the Nvidia website what is the latest driver version which supports your card.
	# At the time of this writing it was 367.
	def get_class_weights(y, smooth_factor=0):
	"""
	Returns the weights for each class based on the frequencies of the samples
	:param smooth_factor: factor that smooths extremely uneven weights
	:param y: list of true labels (the labels must be hashable)
	:return: dictionary with the weight for each class
	"""
	counter = Counter(y)

	if smooth_factor > 0:
	def dot_product(x, kernel):
	"""
	Wrapper for dot product operation, in order to be compatible with both
	Theano and Tensorflow
	Args:
	x (): input
	kernel (): weights
	Returns:
	"""
	if K.backend() == 'tensorflow':

	from keras import backend as K, initializers, regularizers, constraints
	from keras.engine.topology import Layer


	def dot_product(x, kernel):
	"""
	Wrapper for dot product operation, in order to be compatible with both
	Theano and Tensorflow
	Args: