jamesonthecrow/quantize_mlmodel.py

## quantize_mlmodel.py
from coremltools.models.neural_network import quantization_utils

def quantize_model(mlmodel, nbits, method='linear'):
    """Quantize the weights of an mlmodel to a specific number of bits.

    Args:
        mlmodel (coremltools.models.MLModel): A Core ML model
        nbits (int): the bit depth of the quantized weights.
        method (string): the quantization method.

    Returns:
        A quantized Core ML Model.
    """
    quantized_model = quantization_utils.quantize_weights(
        mlmodel, nbits, linear)
    # If we are on a Mac OS, quantization will return a Model,
    # otherwise it will return just the model spec and we'll need to create one
    if type(quantized_model) == coremltools.models.MLModel:
        return quantized_model
    return coremltools.models.MLModel(quantized_model)
	from coremltools.models.neural_network import quantization_utils

	def quantize_model(mlmodel, nbits, method='linear'):
	"""Quantize the weights of an mlmodel to a specific number of bits.

	Args:
	mlmodel (coremltools.models.MLModel): A Core ML model
	nbits (int): the bit depth of the quantized weights.
	method (string): the quantization method.

	Returns:
	A quantized Core ML Model.
	"""
	quantized_model = quantization_utils.quantize_weights(
	mlmodel, nbits, linear)
	# If we are on a Mac OS, quantization will return a Model,
	# otherwise it will return just the model spec and we'll need to create one
	if type(quantized_model) == coremltools.models.MLModel:
	return quantized_model
	return coremltools.models.MLModel(quantized_model)