Skip to content

Instantly share code, notes, and snippets.

@persiyanov
Last active May 28, 2018 14:38
Show Gist options
  • Save persiyanov/bceb706b2d617ebde69e11774fe8dc16 to your computer and use it in GitHub Desktop.
Save persiyanov/bceb706b2d617ebde69e11774fe8dc16 to your computer and use it in GitHub Desktop.
Word2Vec InMemory _train_epoch
def _train_epoch(self, data_iterables, cur_epoch=0, total_examples=None,
total_words=None, queue_factor=2, report_delay=1.0):
"""Train one epoch."""
_reset_performance_metrics()
job_queue = Queue(maxsize=0) # inifinite maxsize
progress_queue = Queue(maxsize=0) # inifinite maxsize
# Filling the queue.
workers = [
threading.Thread(
target=self._job_producer,
args=(data_iterable, job_queue),
kwargs={'cur_epoch': cur_epoch, 'total_examples': total_examples, 'total_words': total_words}
) for data_iterable in data_iterables
]
logger.info('Starting filling job queue.')
for thread in workers:
thread.daemon = True # make interrupting the process with ctrl+c easier
thread.start()
for i, thread in enumerate(workers):
thread.join()
logger.info('Job producer {} thread has finished.'.format(i))
# give the workers heads up that they can finish -- no more work!
for _ in xrange(self.workers):
job_queue.put(None)
# Running workers in order to train the model
workers = [
threading.Thread(
target=self._worker_loop,
args=(job_queue, progress_queue,))
for _ in xrange(self.workers)
]
for thread in workers:
thread.daemon = True # make interrupting the process with ctrl+c easier
thread.start()
trained_word_count, raw_word_count, job_tally = self._log_epoch_progress(
progress_queue, job_queue, cur_epoch=cur_epoch, total_examples=total_examples, total_words=total_words,
report_delay=report_delay)
return trained_word_count, raw_word_count, job_tally
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment