gugarosa/optimize.py

## optimize.py
from opytimizer import Opytimizer
from opytimizer.core.function import Function
from opytimizer.optimizers.abc import ABC
from opytimizer.spaces.search import SearchSpace
from sklearn.manifold import TSNE

import word2vec

# Loading word2vec word embeddings
w2v = word2vec.load_word_vectors()


def tsne(opytimizer):
    # Gathering hyperparams
    perplexity = round(opytimizer[0][0])

    # Training a t-SNE model
    tsne = TSNE(perplexity=perplexity).fit(w2v.encoder[w2v.encoder.wv.vocab])

    return tsne.kl_divergence_


# Creating Function's object
f = Function(pointer=tsne)

# Number of agents
n_agents = 5

# Number of decision variables
n_variables = 1

# Number of running iterations
n_iterations = 10

# Lower and upper bounds (has to be the same size as n_variables)
lower_bound = [1]
upper_bound = [100]

# Creating the SearchSpace class
s = SearchSpace(n_agents=n_agents, n_iterations=n_iterations,
                n_variables=n_variables, lower_bound=lower_bound,
                upper_bound=upper_bound)

# Hyperparameters for the optimizer
hyperparams = {
    'n_trials': 10
}

# Creating ABC's optimizer
p = ABC(hyperparams=hyperparams)

# Finally, we can create an Opytimizer class
o = Opytimizer(space=s, optimizer=p, function=f)

# Running the optimization task
history = o.start()

# Saving optimization's history
history.save('abc.pkl')

## plot_computational_load.py
import matplotlib.pyplot as plt
import numpy as np
from opytimizer.utils.history import History

# Declaring all possible algorithms
algorithms = ['ABC', 'BA', 'GP', 'PSO']

# Dataset to be analyzed
dataset = 'Reuters'

# Creating an empty plot
fig, ax = plt.subplots()

# For every algorithm
for a in algorithms:
    # Create an empty vector of runnings
    y = np.zeros(10)

    # For every running
    for i in range(10):
        # Declares the file name to be loaded
        file_name = f'../results/{dataset}/{a}/{a}_{dataset}_{i}.pkl'

        # Creating an empty History object
        h = History()

        # Loading content from pickle file
        h.load(file_name)

        # Recover the task's time
        y[i] = h.time[0]

    # Plotting the algorithm's computational load
    ax.plot(np.arange(1, 11, 1), y, '-', label=a)

# Setting title and labels
ax.set(xlabel='running', ylabel='time (s)',
       title=f'Computational load for Reuters-21578 (ApteMod) dataset')

# Setting axis limits
ax.set_xlim([1, 10])

# Setting global properties
ax.grid()
ax.legend()

# Displaying plot
plt.show()

## plot_convergence.py
import matplotlib.pyplot as plt
import numpy as np
from opytimizer.utils.history import History

# Creating an empty History object
h = History()

# Declaring all possible algorithms
algorithms = ['ABC', 'BA', 'PSO']

# Dataset to be analyzed
dataset = 'Reuters'

# Creating an empty plot
fig, ax = plt.subplots()

# For every algorithm
for a in algorithms:
    # Create an empty vector of convergence iterations
    y = np.zeros(10)

    # For every running
    for i in range(10):
        # Declares the file name to be loaded
        file_name = f'../results/{dataset}/{a}/{a}_{dataset}_{i}.pkl'

        # Loading content from pickle file
        h.load(file_name)

        # For every best agent
        for j, best in enumerate(h.best_agent):
            # Summing to its corresponding position
            y[j] = y[j] + best[1]

    # Gathering its mean
    y = y / 10

    # Plotting the algorithm's convergence
    ax.plot(np.arange(1, 11, 1), y, '-', label=a)

# Setting title and labels
ax.set(xlabel='iteration', ylabel='fitness',
       title=f'Mean convergence plot for Reuters-21578 (ApteMod) dataset')

# Setting axis limits
ax.set_xlim([1, 10])

# Setting global properties
ax.grid()
ax.legend()

# Displaying plot
plt.show()

## plot_similar_words_2d.py
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import word2vec

# Loading word2vec word embeddings
w2v = word2vec.load_word_vectors()

# Defining main word to be searched and its index
word = 'corp'
word_index = w2v.encoder.wv.vocab[word].index

# Gathering the similarity values for all vocabulary
similar_words = w2v.encoder.similar_by_word(word, topn=len(w2v.encoder.wv.vocab))

# Creating empty lists for positive and negative words
pos_index, neg_index = [], []

# Number of positive and negative words
n = 30

# For the top-n positive words
for s in similar_words[:n]:
    # Appends the index of the word
    pos_index.append(w2v.encoder.wv.vocab[s[0]].index)

# For the top-n negative words
for s in similar_words[:-n:-1]:
    # Appends the index of the word
    neg_index.append(w2v.encoder.wv.vocab[s[0]].index)

# Gathering perplexity value
perplexity = 30

# Training and transforming data using t-SNE
tsne = TSNE(perplexity=perplexity).fit_transform(
    w2v.encoder[w2v.encoder.wv.vocab])

# Creating a figure and its subplots
fig, ax = plt.subplots()

# For the top-n positive words
for i in pos_index:
    # Plot the words as blue points
    ax.plot(tsne[i, 0], tsne[i, 1], 'bo')

# For the top-n negative words
for i in neg_index:
    # Plot the words as red points
    ax.plot(tsne[i, 0], tsne[i, 1], 'ro')

# Plot the main word
ax.plot(tsne[word_index, 0], tsne[word_index, 1], 'yo')

# Setting title
ax.set_title(f'Top-{n} positive and negative words similar to: `{word}`')

# Removing the axis ticks
# ax.set_yticklabels([])
# ax.set_xticklabels([])

# For the top-n positive words
for (s, index) in zip(similar_words[:n], pos_index):
    # Annotate the word and its similarity level
    plt.annotate(f'{s[0]} ({s[1]:.2f})', xy=(tsne[index, 0], tsne[index, 1]))

# For the top-n negative words
for (s, index) in zip(similar_words[:-n:-1], neg_index):
    # Annotate the word and its similarity level
    plt.annotate(f'{s[0]} ({s[1]:.2f})', xy=(tsne[index, 0], tsne[index, 1]))

# Annotate the main word and its similarity level
plt.annotate(word, xy=(tsne[word_index, 0], tsne[word_index, 1]))

# Shows the plot
plt.show()

## plot_tsne.py
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

import word2vec

# Loading word2vec word embeddings
w2v = word2vec.load_word_vectors()

# Gathering perplexity value
perplexity = 99.99

# Training and transforming data using t-SNE
tsne = TSNE(perplexity=perplexity, random_state=0).fit_transform(
    w2v.encoder[w2v.encoder.wv.vocab])

# Plotting first 50 words from the dataset
fig, ax = plt.subplots()
for i in range(49):
    ax.plot(tsne[i, 0], tsne[i, 1], 'o')
ax.set_title(f'Reuters-21578 (ApteMod) dataset: Perplexity = {perplexity}')
ax.set_yticklabels([])
ax.set_xticklabels([])

# Adding words' labels to each point
words = list(w2v.encoder.wv.vocab)
for i, word in enumerate(words):
    if i > 50:
        break
    plt.annotate(word, xy=(tsne[i, 0], tsne[i, 1]))
plt.show()

## word2vec.py
import nalp.stream.loader as l
import nalp.stream.preprocess as p
from nalp.encoders.word2vec import Word2Vec
from sklearn.datasets import fetch_20newsgroups, load_files


def load_word_vectors():
    # 20 Newsgroups dataset
    dataset = fetch_20newsgroups(subset='all')

    # Movie reviews dataset
    # dataset = load_files('data/movie_reviews')

    # Reuters-21578 (ApteMod) dataset
    # dataset = load_files('data/reuters')

    # Creates a pre-processing pipeline
    pipe = p.pipeline(
        p.lower_case,
        p.valid_char,
        p.tokenize_to_word
    )

    # Applying pre-processing pipeline to X
    X = [pipe(str(x)) for x in dataset.data]

    # Creating a Word2Vec (Enconder's child) class
    e = Word2Vec()

    # Calling its internal method to learn an encoding representation
    e.learn(X, max_features=300, min_count=100)

    return e
	from opytimizer import Opytimizer
	from opytimizer.core.function import Function
	from opytimizer.optimizers.abc import ABC
	from opytimizer.spaces.search import SearchSpace
	from sklearn.manifold import TSNE

	import word2vec

	# Loading word2vec word embeddings
	w2v = word2vec.load_word_vectors()


	def tsne(opytimizer):
	# Gathering hyperparams
	perplexity = round(opytimizer[0][0])

	# Training a t-SNE model
	tsne = TSNE(perplexity=perplexity).fit(w2v.encoder[w2v.encoder.wv.vocab])

	return tsne.kl_divergence_


	# Creating Function's object
	f = Function(pointer=tsne)

	# Number of agents
	n_agents = 5

	# Number of decision variables
	n_variables = 1

	# Number of running iterations
	n_iterations = 10

	# Lower and upper bounds (has to be the same size as n_variables)
	lower_bound = [1]
	upper_bound = [100]

	# Creating the SearchSpace class
	s = SearchSpace(n_agents=n_agents, n_iterations=n_iterations,
	n_variables=n_variables, lower_bound=lower_bound,
	upper_bound=upper_bound)

	# Hyperparameters for the optimizer
	hyperparams = {
	'n_trials': 10
	}

	# Creating ABC's optimizer
	p = ABC(hyperparams=hyperparams)

	# Finally, we can create an Opytimizer class
	o = Opytimizer(space=s, optimizer=p, function=f)

	# Running the optimization task
	history = o.start()

	# Saving optimization's history
	history.save('abc.pkl')
	import matplotlib.pyplot as plt
	import numpy as np
	from opytimizer.utils.history import History

	# Declaring all possible algorithms
	algorithms = ['ABC', 'BA', 'GP', 'PSO']

	# Dataset to be analyzed
	dataset = 'Reuters'

	# Creating an empty plot
	fig, ax = plt.subplots()

	# For every algorithm
	for a in algorithms:
	# Create an empty vector of runnings
	y = np.zeros(10)

	# For every running
	for i in range(10):
	# Declares the file name to be loaded
	file_name = f'../results/{dataset}/{a}/{a}_{dataset}_{i}.pkl'

	# Creating an empty History object
	h = History()

	# Loading content from pickle file
	h.load(file_name)

	# Recover the task's time
	y[i] = h.time[0]

	# Plotting the algorithm's computational load
	ax.plot(np.arange(1, 11, 1), y, '-', label=a)

	# Setting title and labels
	ax.set(xlabel='running', ylabel='time (s)',
	title=f'Computational load for Reuters-21578 (ApteMod) dataset')

	# Setting axis limits
	ax.set_xlim([1, 10])

	# Setting global properties
	ax.grid()
	ax.legend()

	# Displaying plot
	plt.show()
	import nalp.stream.loader as l
	import nalp.stream.preprocess as p
	from nalp.encoders.word2vec import Word2Vec
	from sklearn.datasets import fetch_20newsgroups, load_files


	def load_word_vectors():
	# 20 Newsgroups dataset
	dataset = fetch_20newsgroups(subset='all')

	# Movie reviews dataset
	# dataset = load_files('data/movie_reviews')

	# Reuters-21578 (ApteMod) dataset
	# dataset = load_files('data/reuters')

	# Creates a pre-processing pipeline
	pipe = p.pipeline(
	p.lower_case,
	p.valid_char,
	p.tokenize_to_word
	)

	# Applying pre-processing pipeline to X
	X = [pipe(str(x)) for x in dataset.data]

	# Creating a Word2Vec (Enconder's child) class
	e = Word2Vec()

	# Calling its internal method to learn an encoding representation
	e.learn(X, max_features=300, min_count=100)

	return e