montanalow/my_app.pipelines.product_popularity.get_encoders.py

## my_app.pipelines.product_popularity.get_encoders.py
# my_app/pipelines/product_popularity.py part 2

    def get_encoders(self):
        return (
            # An encoder to tokenize product names into max 15 tokens that
            # occur in the corpus at least 10 times. We also want the
            # estimator to spend 5x as many resources on name vs department
            # since there are so many more words in english than there are
            # grocery store departments.
            Token('product_name', sequence_length=15, minimum_occurrences=10, embed_scale=5),
            # An encoder to translate department names into unique
            # identifiers that occur at least 50 times
            Unique('department', minimum_occurrences=50)
        )

    def get_output_encoder(self):
        # Sales is floating point which we could Pass encode directly to the
        # estimator, but Norm will bring it to small values around 0,
        # which are more amenable to deep learning.
        return Norm('sales')
	# my_app/pipelines/product_popularity.py part 2

	def get_encoders(self):
	return (
	# An encoder to tokenize product names into max 15 tokens that
	# occur in the corpus at least 10 times. We also want the
	# estimator to spend 5x as many resources on name vs department
	# since there are so many more words in english than there are
	# grocery store departments.
	Token('product_name', sequence_length=15, minimum_occurrences=10, embed_scale=5),
	# An encoder to translate department names into unique
	# identifiers that occur at least 50 times
	Unique('department', minimum_occurrences=50)
	)

	def get_output_encoder(self):
	# Sales is floating point which we could Pass encode directly to the
	# estimator, but Norm will bring it to small values around 0,
	# which are more amenable to deep learning.
	return Norm('sales')