Gaarv Gaarv

## spark_parallel_boost.py
from pyspark import SparkContext

import numpy as np

from sklearn.model_selection import train_test_split, ShuffleSplit
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

def run(sc):

## gist:cff71de21bf410d80aff1c032aa6caf9
  override def multiply(m1: Array[Array[Double]], m2: Array[Array[Double]]) :  Array[Array[Double]] = {
    val res =  Array.ofDim[Double](m1.length, m2(0).length)
    val M1_COLS = m1(0).length
    val M1_ROWS = m1.length
    val M2_COLS = m2(0).length

    @inline def singleThreadedMultiplicationFAST(start_row:Int,  end_row:Int) {
      var col, i  = 0
      var sum = 0.0
      var row = start_row

## marisa_count_vectorizer.py
import numpy as np
import marisa_trie
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.externals import six

class MarisaCountVectorizer(CountVectorizer):

    # ``CountVectorizer.fit`` method calls ``fit_transform`` so
    # ``fit`` is not provided
    def fit_transform(self, raw_documents, y=None):

## marisa_vectorizers.py
import marisa_trie
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer


# hack to store vocabulary in MARISA Trie
class _MarisaVocabularyMixin(object):

    def fit_transform(self, raw_documents, y=None):
        super(_MarisaVocabularyMixin, self).fit_transform(raw_documents)
        self._freeze_vocabulary()

## keras_gensim_embeddings.py
from __future__ import print_function

import json
import os
import numpy as np

from gensim.models import Word2Vec
from gensim.utils import simple_preprocess
from keras.engine import Input
from keras.layers import Embedding, merge

## attention_lstm.py
class AttentionLSTM(LSTM):
    """LSTM with attention mechanism

    This is an LSTM incorporating an attention mechanism into its hidden states.
    Currently, the context vector calculated from the attended vector is fed
    into the model's internal states, closely following the model by Xu et al.
    (2016, Sec. 3.1.2), using a soft attention model following
    Bahdanau et al. (2014).

    The layer expects two inputs instead of the usual one:

## residual_network.py
"""
Clean and simple Keras implementation of network architectures described in:
    - (ResNet-50) [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf).
    - (ResNeXt-50 32x4d) [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/pdf/1611.05431.pdf).

Python 3.
"""

def residual_network(x):

## Graph
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;

## gist:84c029bb69fcb69d625ef700750e3bb6
# 1 - change in submit.py from:
def load_input_data(file_location):
    with open(file_location, 'r') as input_data_file:
        input_data = ''.join(input_data_file.readlines())
    return input_data

# to:
def load_input_data(file_location):
    return file_location

## serialization.sc
import java.io._
import java.util.Base64
import java.nio.charset.StandardCharsets.UTF_8

def serialise(value: Any): String = {
  val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
  val oos = new ObjectOutputStream(stream)
  oos.writeObject(value)
  oos.close
  new String(
	from pyspark import SparkContext

	import numpy as np

	from sklearn.model_selection import train_test_split, ShuffleSplit
	from sklearn.datasets import make_classification
	from sklearn.metrics import accuracy_score
	from sklearn.tree import DecisionTreeClassifier

	def run(sc):
	override def multiply(m1: Array[Array[Double]], m2: Array[Array[Double]]) : Array[Array[Double]] = {
	val res = Array.ofDim[Double](m1.length, m2(0).length)
	val M1_COLS = m1(0).length
	val M1_ROWS = m1.length
	val M2_COLS = m2(0).length

	@inline def singleThreadedMultiplicationFAST(start_row:Int, end_row:Int) {
	var col, i = 0
	var sum = 0.0
	var row = start_row
	import numpy as np
	import marisa_trie
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.externals import six

	class MarisaCountVectorizer(CountVectorizer):

	# ``CountVectorizer.fit`` method calls ``fit_transform`` so
	# ``fit`` is not provided
	def fit_transform(self, raw_documents, y=None):
	import marisa_trie
	from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer


	# hack to store vocabulary in MARISA Trie
	class _MarisaVocabularyMixin(object):

	def fit_transform(self, raw_documents, y=None):
	super(_MarisaVocabularyMixin, self).fit_transform(raw_documents)
	self._freeze_vocabulary()
	from __future__ import print_function

	import json
	import os
	import numpy as np

	from gensim.models import Word2Vec
	from gensim.utils import simple_preprocess
	from keras.engine import Input
	from keras.layers import Embedding, merge
	class AttentionLSTM(LSTM):
	"""LSTM with attention mechanism

	This is an LSTM incorporating an attention mechanism into its hidden states.
	Currently, the context vector calculated from the attended vector is fed
	into the model's internal states, closely following the model by Xu et al.
	(2016, Sec. 3.1.2), using a soft attention model following
	Bahdanau et al. (2014).

	The layer expects two inputs instead of the usual one:
	"""
	Clean and simple Keras implementation of network architectures described in:
	- (ResNet-50) [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf).
	- (ResNeXt-50 32x4d) [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/pdf/1611.05431.pdf).

	Python 3.
	"""

	def residual_network(x):
	import java.io.IOException;
	import java.nio.charset.Charset;
	import java.nio.file.FileSystems;
	import java.nio.file.Files;
	import java.nio.file.Path;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.Comparator;
	import java.util.HashMap;
	import java.util.List;
	# 1 - change in submit.py from:
	def load_input_data(file_location):
	with open(file_location, 'r') as input_data_file:
	input_data = ''.join(input_data_file.readlines())
	return input_data

	# to:
	def load_input_data(file_location):
	return file_location
	import java.io._
	import java.util.Base64
	import java.nio.charset.StandardCharsets.UTF_8

	def serialise(value: Any): String = {
	val stream: ByteArrayOutputStream = new ByteArrayOutputStream()
	val oos = new ObjectOutputStream(stream)
	oos.writeObject(value)
	oos.close
	new String(