wkcn/op.h

## op.h
/*!
*  Copyright (c) 2019 by Contributors
* \file op.h
* \brief definition of all the operators
* \author Chuntao Hong, Xin Li
*/

#ifndef MXNET_CPP_OP_H_
#define MXNET_CPP_OP_H_

#include <string>
#include <vector>
#include "mxnet-cpp/base.h"
#include "mxnet-cpp/shape.h"
#include "mxnet-cpp/op_util.h"
#include "mxnet-cpp/operator.h"
#include "dmlc/optional.h"
#include "nnvm/tuple.h"

namespace mxnet {
namespace cpp {

/*!
 * \brief Returns result of first array elements raised to powers from second array,
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_power(x, y) = [[ 2.,  2.,  2.],
 *        [ 4.,  4.,  4.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L45
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_power(const std::string& symbol_name,
                              Symbol lhs,
                              Symbol rhs) {
  return Operator("broadcast_power")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise maximum of the input arrays with broadcasting.
 *
 *        This function compares two input arrays and returns a new array having the
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_maximum(x, y) = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L80
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_maximum(const std::string& symbol_name,
                                Symbol lhs,
                                Symbol rhs) {
  return Operator("broadcast_maximum")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise minimum of the input arrays with broadcasting.
 *
 *        This function compares two input arrays and returns a new array having the
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_maximum(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L115
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_minimum(const std::string& symbol_name,
                                Symbol lhs,
                                Symbol rhs) {
  return Operator("broadcast_minimum")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the hypotenuse of a right angled triangle, given its "legs"
 *        with broadcasting.
 *
 *        It is equivalent to doing :math:`sqrt(x_1^2 + x_2^2)`.
 *
 *        Example::
 *
 *        x = [[ 3.,  3.,  3.]]
 *
 *        y = [[ 4.],
 *        [ 4.]]
 *
 *        broadcast_hypot(x, y) = [[ 5.,  5.,  5.],
 *        [ 5.,  5.,  5.]]
 *
 *        z = [[ 0.],
 *        [ 4.]]
 *
 *        broadcast_hypot(x, z) = [[ 3.,  3.,  3.],
 *        [ 5.,  5.,  5.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L156
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_hypot(const std::string& symbol_name,
                              Symbol lhs,
                              Symbol rhs) {
  return Operator("broadcast_hypot")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _power(const std::string& symbol_name,
                     Symbol lhs,
                     Symbol rhs) {
  return Operator("_power")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _maximum(const std::string& symbol_name,
                       Symbol lhs,
                       Symbol rhs) {
  return Operator("_maximum")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _minimum(const std::string& symbol_name,
                       Symbol lhs,
                       Symbol rhs) {
  return Operator("_minimum")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Given the "legs" of a right triangle, return its hypotenuse.
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_op_extended.cc:L79
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _hypot(const std::string& symbol_name,
                     Symbol lhs,
                     Symbol rhs) {
  return Operator("_hypot")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the square sum of array elements over a given axis
 *        for row-sparse matrix. This is a temporary solution for fusing ops square and
 *        sum together for row-sparse matrix to save memory for storing gradients.
 *        It will become deprecated once the functionality of fusing operators is finished
 *        in the future.
 *
 *        Example::
 *
 *        dns = mx.nd.array([[0, 0], [1, 2], [0, 0], [3, 4], [0, 0]])
 *        rsp = dns.tostype('row_sparse')
 *        sum = mx.nd._internal._square_sum(rsp, axis=1)
 *        sum = [0, 5, 0, 25, 0]
 *
 *
 *        Defined in src/operator/tensor/square_sum.cc:L63
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol _square_sum(const std::string& symbol_name,
                          Symbol data,
                          dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                          bool keepdims = false,
                          bool exclude = false) {
  return Operator("_square_sum")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Adds arguments element-wise.
 *
 *        The storage type of ``elemwise_add`` output depends on storage types of inputs
 *
 *        - elemwise_add(row_sparse, row_sparse) = row_sparse
 *        - elemwise_add(csr, csr) = csr
 *        - elemwise_add(default, csr) = default
 *        - elemwise_add(csr, default) = default
 *        - elemwise_add(default, rsp) = default
 *        - elemwise_add(rsp, default) = default
 *        - otherwise, ``elemwise_add`` generates output with default storage
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol elemwise_add(const std::string& symbol_name,
                           Symbol lhs,
                           Symbol rhs) {
  return Operator("elemwise_add")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _grad_add(const std::string& symbol_name,
                        Symbol lhs,
                        Symbol rhs) {
  return Operator("_grad_add")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Subtracts arguments element-wise.
 *
 *        The storage type of ``elemwise_sub`` output depends on storage types of inputs
 *
 *        - elemwise_sub(row_sparse, row_sparse) = row_sparse
 *        - elemwise_sub(csr, csr) = csr
 *        - elemwise_sub(default, csr) = default
 *        - elemwise_sub(csr, default) = default
 *        - elemwise_sub(default, rsp) = default
 *        - elemwise_sub(rsp, default) = default
 *        - otherwise, ``elemwise_sub`` generates output with default storage
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol elemwise_sub(const std::string& symbol_name,
                           Symbol lhs,
                           Symbol rhs) {
  return Operator("elemwise_sub")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Multiplies arguments element-wise.
 *
 *        The storage type of ``elemwise_mul`` output depends on storage types of inputs
 *
 *        - elemwise_mul(default, default) = default
 *        - elemwise_mul(row_sparse, row_sparse) = row_sparse
 *        - elemwise_mul(default, row_sparse) = row_sparse
 *        - elemwise_mul(row_sparse, default) = row_sparse
 *        - elemwise_mul(csr, csr) = csr
 *        - otherwise, ``elemwise_mul`` generates output with default storage
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol elemwise_mul(const std::string& symbol_name,
                           Symbol lhs,
                           Symbol rhs) {
  return Operator("elemwise_mul")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Divides arguments element-wise.
 *
 *        The storage type of ``elemwise_div`` output is always dense
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol elemwise_div(const std::string& symbol_name,
                           Symbol lhs,
                           Symbol rhs) {
  return Operator("elemwise_div")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _mod(const std::string& symbol_name,
                   Symbol lhs,
                   Symbol rhs) {
  return Operator("_mod")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*! \brief The desired storage type of the forward output given by user, if thecombination
 *        of input storage types and this hint does not matchany implemented ones, the
 *        dot operator will perform fallback operationand still produce an output of the
 */
enum class DotForwardStype {
  kNone = 0,
  kCsr = 1,
  kDefault = 2,
  kRow_sparse = 3
};

/*!
 * \brief Dot product of two arrays.
 *
 *        ``dot``'s behavior depends on the input array dimensions:
 *
 *        - 1-D arrays: inner product of vectors
 *        - 2-D arrays: matrix multiplication
 *        - N-D arrays: a sum product over the last axis of the first input and the first
 *        axis of the second input
 *
 *        For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape
 *        result array will have shape `(n,m,r,s)`. It is computed by::
 *
 *        dot(x,y)[i,j,a,b] = sum(x[i,j,:]*y[:,a,b])
 *
 *        Example::
 *
 *        x = reshape([0,1,2,3,4,5,6,7], shape=(2,2,2))
 *        y = reshape([7,6,5,4,3,2,1,0], shape=(2,2,2))
 *        dot(x,y)[0,0,1,1] = 0
 *        sum(x[0,0,:]*y[:,1,1]) = 0
 *
 *        The storage type of ``dot`` output depends on storage types of inputs,
 *        forward_stype option for output storage type. Implemented sparse operations
 *
 *        - dot(default, default, transpose_a=True/False, transpose_b=True/False) =
 *        - dot(csr, default, transpose_a=True) = default
 *        - dot(csr, default, transpose_a=True) = row_sparse
 *        - dot(csr, default) = default
 *        - dot(csr, row_sparse) = default
 *        - dot(default, csr) = csr (CPU only)
 *        - dot(default, csr, forward_stype='default') = default
 *        - dot(default, csr, transpose_b=True, forward_stype='default') = default
 *
 *        If the combination of input storage types and forward_stype does not match any
 *        above patterns, ``dot`` will fallback and generate output with default storage.
 *
 *        .. Note::
 *
 *        If the storage type of the lhs is "csr", the storage type of gradient w.r.t rhs
 *        "row_sparse". Only a subset of optimizers support sparse gradients, including
 *        and Adam. Note that by default lazy updates is turned on, which may perform
 *        from standard updates. For more details, please check the Optimization API at:
 *        https://mxnet.incubator.apache.org/api/python/optimization/optimization.html
 *
 *
 *
 *        Defined in src/operator/tensor/dot.cc:L77
 * \param symbol_name name of the resulting symbol
 * \param lhs The first input
 * \param rhs The second input
 * \param transpose_a If true then transpose the first input before dot.
 * \param transpose_b If true then transpose the second input before dot.
 * \param forward_stype The desired storage type of the forward output given by user, if
 *        thecombination of input storage types and this hint does not matchany
 *        implemented ones, the dot operator will perform fallback operationand still
 * \return new symbol
 */
inline Symbol dot(const std::string& symbol_name,
                  Symbol lhs,
                  Symbol rhs,
                  bool transpose_a = false,
                  bool transpose_b = false,
                  DotForwardStype forward_stype = DotForwardStype::kNone) {
  static const char *DotForwardStypeValues[] = {
    "None",
    "csr",
    "default",
    "row_sparse"
  };
  return Operator("dot")
           .SetParam("transpose_a", transpose_a)
           .SetParam("transpose_b", transpose_b)
           .SetParam("forward_stype", DotForwardStypeValues[int(forward_stype)])
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*! \brief The desired storage type of the forward output given by user, if thecombination
 *        of input storage types and this hint does not matchany implemented ones, the
 *        dot operator will perform fallback operationand still produce an output of the
 */
enum class Batch_dotForwardStype {
  kNone = 0,
  kCsr = 1,
  kDefault = 2,
  kRow_sparse = 3
};

/*!
 * \brief Batchwise dot product.
 *
 *        ``batch_dot`` is used to compute dot product of ``x`` and ``y`` when ``x`` and
 *        ``y`` are data in batch, namely 3D arrays in shape of `(batch_size, :, :)`.
 *
 *        For example, given ``x`` with shape `(batch_size, n, m)` and ``y`` with shape
 *        `(batch_size, m, k)`, the result array will have shape `(batch_size, n, k)`,
 *        which is computed by::
 *
 *        batch_dot(x,y)[i,:,:] = dot(x[i,:,:], y[i,:,:])
 *
 *
 *
 *        Defined in src/operator/tensor/dot.cc:L125
 * \param symbol_name name of the resulting symbol
 * \param lhs The first input
 * \param rhs The second input
 * \param transpose_a If true then transpose the first input before dot.
 * \param transpose_b If true then transpose the second input before dot.
 * \param forward_stype The desired storage type of the forward output given by user, if
 *        thecombination of input storage types and this hint does not matchany
 *        implemented ones, the dot operator will perform fallback operationand still
 * \return new symbol
 */
inline Symbol batch_dot(const std::string& symbol_name,
                        Symbol lhs,
                        Symbol rhs,
                        bool transpose_a = false,
                        bool transpose_b = false,
                        Batch_dotForwardStype forward_stype = Batch_dotForwardStype::kNone) {
  static const char *Batch_dotForwardStypeValues[] = {
    "None",
    "csr",
    "default",
    "row_sparse"
  };
  return Operator("batch_dot")
           .SetParam("transpose_a", transpose_a)
           .SetParam("transpose_b", transpose_b)
           .SetParam("forward_stype", Batch_dotForwardStypeValues[int(forward_stype)])
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief fill target with zeros without default dtype
 * \param symbol_name name of the resulting symbol
 * \param shape The shape of the output
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _zeros_without_dtype(const std::string& symbol_name,
                                   Shape shape = Shape(),
                                   const std::string& ctx = "",
                                   int dtype = -1) {
  return Operator("_zeros_without_dtype")
           .SetParam("shape", shape)
           .SetParam("dtype", dtype)
           .CreateSymbol(symbol_name);
}

/*! \brief Target data type.
 */
enum class _zerosDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief fill target with zeros
 * \param symbol_name name of the resulting symbol
 * \param shape The shape of the output
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _zeros(const std::string& symbol_name,
                     Shape shape = {},
                     const std::string& ctx = "",
                     _zerosDtype dtype = _zerosDtype::kFloat32) {
  static const char *_zerosDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_zeros")
           .SetParam("shape", shape)
           .SetParam("dtype", _zerosDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief Target data type.
 */
enum class _eyeDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief Return a 2-D array with ones on the diagonal and zeros elsewhere.
 * \param symbol_name name of the resulting symbol
 * \param N Number of rows in the output.
 * \param M Number of columns in the output. If 0, defaults to N
 * \param k Index of the diagonal. 0 (the default) refers to the main diagonal.A positive
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _eye(const std::string& symbol_name,
                   int64_t N,
                   int64_t M = 0,
                   int64_t k = 0,
                   const std::string& ctx = "",
                   _eyeDtype dtype = _eyeDtype::kFloat32) {
  static const char *_eyeDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_eye")
           .SetParam("N", N)
           .SetParam("M", M)
           .SetParam("k", k)
           .SetParam("dtype", _eyeDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief Target data type.
 */
enum class _onesDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief fill target with ones
 * \param symbol_name name of the resulting symbol
 * \param shape The shape of the output
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _ones(const std::string& symbol_name,
                    Shape shape = {},
                    const std::string& ctx = "",
                    _onesDtype dtype = _onesDtype::kFloat32) {
  static const char *_onesDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_ones")
           .SetParam("shape", shape)
           .SetParam("dtype", _onesDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief Target data type.
 */
enum class _fullDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief fill target with a scalar value
 * \param symbol_name name of the resulting symbol
 * \param value Value with which to fill newly created tensor
 * \param shape The shape of the output
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _full(const std::string& symbol_name,
                    double value,
                    Shape shape = Shape(),
                    const std::string& ctx = "",
                    _fullDtype dtype = _fullDtype::kFloat32) {
  static const char *_fullDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_full")
           .SetParam("value", value)
           .SetParam("shape", shape)
           .SetParam("dtype", _fullDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief Target data type.
 */
enum class _arangeDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief Return evenly spaced values within a given interval. Similar to Numpy
 * \param symbol_name name of the resulting symbol
 * \param start Start of interval. The interval includes this value. The default start
 * \param stop End of interval. The interval does not include this value, except in some
 *        cases where step is not an integer and floating point round-off affects the
 * \param step Spacing between values.
 * \param repeat The repeating time of all elements. E.g repeat=3, the element a will be
 * \param infer_range When set to True, infer the stop position from the start, step,
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _arange(const std::string& symbol_name,
                      double start,
                      dmlc::optional<double> stop = dmlc::optional<double>(),
                      double step = 1,
                      int repeat = 1,
                      bool infer_range = false,
                      const std::string& ctx = "",
                      _arangeDtype dtype = _arangeDtype::kFloat32) {
  static const char *_arangeDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_arange")
           .SetParam("start", start)
           .SetParam("stop", stop)
           .SetParam("step", step)
           .SetParam("repeat", repeat)
           .SetParam("infer_range", infer_range)
           .SetParam("dtype", _arangeDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Return an array with evenly spaced values. If axis is not given, the output will
 *        have the same shape as the input array. Otherwise, the output will be a 1-D
 *        the specified axis in input shape.
 *
 *        Examples::
 *
 *        x = [[0.14883883 0.7772398  0.94865847 0.7225052 ]
 *        [0.23729339 0.6112595  0.66538996 0.5132841 ]
 *        [0.30822644 0.9912457  0.15502319 0.7043658 ]]
 *        <NDArray 3x4 @cpu(0)>
 *
 *        out = mx.nd.contrib.arange_like(x, start=0)
 *
 *        [[ 0.  1.  2.  3.]
 *        [ 4.  5.  6.  7.]
 *        [ 8.  9. 10. 11.]]
 *        <NDArray 3x4 @cpu(0)>
 *
 *        out = mx.nd.contrib.arange_like(x, start=0, axis=-1)
 *
 *        [0. 1. 2. 3.]
 *        <NDArray 4 @cpu(0)>
 *
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \return new symbol
 */
inline Symbol _contrib_arange_like(const std::string& symbol_name,
                                   Symbol data) {
  return Operator("_contrib_arange_like")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief Target data type.
 */
enum class _linspaceDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief Return evenly spaced numbers over a specified interval. Similar to Numpy
 * \param symbol_name name of the resulting symbol
 * \param start Start of interval. The interval includes this value. The default start
 * \param stop End of interval. The interval does not include this value, except in some
 *        cases where step is not an integer and floating point round-off affects the
 * \param step Spacing between values.
 * \param repeat The repeating time of all elements. E.g repeat=3, the element a will be
 * \param infer_range When set to True, infer the stop position from the start, step,
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _linspace(const std::string& symbol_name,
                        double start,
                        dmlc::optional<double> stop = dmlc::optional<double>(),
                        double step = 1,
                        int repeat = 1,
                        bool infer_range = false,
                        const std::string& ctx = "",
                        _linspaceDtype dtype = _linspaceDtype::kFloat32) {
  static const char *_linspaceDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_linspace")
           .SetParam("start", start)
           .SetParam("stop", stop)
           .SetParam("step", step)
           .SetParam("repeat", repeat)
           .SetParam("infer_range", infer_range)
           .SetParam("dtype", _linspaceDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Return an array of zeros with the same shape, type and storage type
 *        as the input array.
 *
 *        The storage type of ``zeros_like`` output depends on the storage type of the
 *
 *        - zeros_like(row_sparse) = row_sparse
 *        - zeros_like(csr) = csr
 *        - zeros_like(default) = default
 *
 *        Examples::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        zeros_like(x) = [[ 0.,  0.,  0.],
 *        [ 0.,  0.,  0.]]
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \return new symbol
 */
inline Symbol zeros_like(const std::string& symbol_name,
                         Symbol data) {
  return Operator("zeros_like")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Return an array of ones with the same shape and type
 *        as the input array.
 *
 *        Examples::
 *
 *        x = [[ 0.,  0.,  0.],
 *        [ 0.,  0.,  0.]]
 *
 *        ones_like(x) = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \return new symbol
 */
inline Symbol ones_like(const std::string& symbol_name,
                        Symbol data) {
  return Operator("ones_like")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Adds all input arguments element-wise.
 *
 *        .. math::
 *        add\_n(a_1, a_2, ..., a_n) = a_1 + a_2 + ... + a_n
 *
 *        ``add_n`` is potentially more efficient than calling ``add`` by `n` times.
 *
 *        The storage type of ``add_n`` output depends on storage types of inputs
 *
 *        - add_n(row_sparse, row_sparse, ..) = row_sparse
 *        - add_n(default, csr, default) = default
 *        - add_n(any input combinations longer than 4 (>4) with at least one default
 *        - otherwise, ``add_n`` falls all inputs back to default storage and generates
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_sum.cc:L155
 * \param symbol_name name of the resulting symbol
 * \param args Positional input arguments
 * \return new symbol
 */
inline Symbol add_n(const std::string& symbol_name,
                    const std::vector<Symbol>& args) {
  return Operator("add_n")
(args)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Performs general matrix multiplication and accumulation.
 *        Input are tensors *A*, *B*, *C*, each of dimension *n >= 2* and having the same
 *        on the leading *n-2* dimensions.
 *
 *        If *n=2*, the BLAS3 function *gemm* is performed:
 *
 *        *out* = *alpha* \* *op*\ (*A*) \* *op*\ (*B*) + *beta* \* *C*
 *
 *        Here, *alpha* and *beta* are scalar parameters, and *op()* is either the
 *        matrix transposition (depending on *transpose_a*, *transpose_b*).
 *
 *        If *n>2*, *gemm* is performed separately for a batch of matrices. The column
 *        are given by the last dimensions of the tensors, the row indices by the axis
 *        parameter. By default, the trailing two dimensions will be used for matrix
 *
 *        For a non-default axis parameter, the operation performed is equivalent to a
 *        calls. For example let *A*, *B*, *C* be 5 dimensional tensors. Then gemm(*A*,
 *        to the following without the overhead of the additional swapaxis operations::
 *
 *        A1 = swapaxes(A, dim1=1, dim2=3)
 *        B1 = swapaxes(B, dim1=1, dim2=3)
 *        C = swapaxes(C, dim1=1, dim2=3)
 *        C = gemm(A1, B1, C)
 *        C = swapaxis(C, dim1=1, dim2=3)
 *
 *        When the input data is of type float32 and the environment variables
 *        and MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION are set to 1, this operator will
 *        pseudo-float16 precision (float32 math with float16 I/O) precision in order to
 *        Tensor Cores on suitable NVIDIA GPUs. This can sometimes give significant
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix multiply-add
 *        A = [[1.0, 1.0], [1.0, 1.0]]
 *        B = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
 *        C = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
 *        gemm(A, B, C, transpose_b=True, alpha=2.0, beta=10.0)
 *        = [[14.0, 14.0, 14.0], [14.0, 14.0, 14.0]]
 *
 *        // Batch matrix multiply-add
 *        A = [[[1.0, 1.0]], [[0.1, 0.1]]]
 *        B = [[[1.0, 1.0]], [[0.1, 0.1]]]
 *        C = [[[10.0]], [[0.01]]]
 *        gemm(A, B, C, transpose_b=True, alpha=2.0 , beta=10.0)
 *        = [[[104.0]], [[0.14]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L89
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of input matrices
 * \param B Tensor of input matrices
 * \param C Tensor of input matrices
 * \param transpose_a Multiply with transposed of first input (A).
 * \param transpose_b Multiply with transposed of second input (B).
 * \param alpha Scalar factor multiplied with A*B.
 * \param beta Scalar factor multiplied with C.
 * \param axis Axis corresponding to the matrix rows.
 * \return new symbol
 */
inline Symbol _linalg_gemm(const std::string& symbol_name,
                           Symbol A,
                           Symbol B,
                           Symbol C,
                           bool transpose_a = false,
                           bool transpose_b = false,
                           double alpha = 1,
                           double beta = 1,
                           int axis = -2) {
  return Operator("_linalg_gemm")
           .SetParam("transpose_a", transpose_a)
           .SetParam("transpose_b", transpose_b)
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetParam("axis", axis)
           .SetInput("A", A)
           .SetInput("B", B)
           .SetInput("C", C)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Performs general matrix multiplication.
 *        Input are tensors *A*, *B*, each of dimension *n >= 2* and having the same shape
 *        on the leading *n-2* dimensions.
 *
 *        If *n=2*, the BLAS3 function *gemm* is performed:
 *
 *        *out* = *alpha* \* *op*\ (*A*) \* *op*\ (*B*)
 *
 *        Here *alpha* is a scalar parameter and *op()* is either the identity or the
 *        transposition (depending on *transpose_a*, *transpose_b*).
 *
 *        If *n>2*, *gemm* is performed separately for a batch of matrices. The column
 *        are given by the last dimensions of the tensors, the row indices by the axis
 *        parameter. By default, the trailing two dimensions will be used for matrix
 *
 *        For a non-default axis parameter, the operation performed is equivalent to a
 *        calls. For example let *A*, *B* be 5 dimensional tensors. Then gemm(*A*, *B*,
 *        the following without the overhead of the additional swapaxis operations::
 *
 *        A1 = swapaxes(A, dim1=1, dim2=3)
 *        B1 = swapaxes(B, dim1=1, dim2=3)
 *        C = gemm2(A1, B1)
 *        C = swapaxis(C, dim1=1, dim2=3)
 *
 *        When the input data is of type float32 and the environment variables
 *        and MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION are set to 1, this operator will
 *        pseudo-float16 precision (float32 math with float16 I/O) precision in order to
 *        Tensor Cores on suitable NVIDIA GPUs. This can sometimes give significant
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix multiply
 *        A = [[1.0, 1.0], [1.0, 1.0]]
 *        B = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
 *        gemm2(A, B, transpose_b=True, alpha=2.0)
 *        = [[4.0, 4.0, 4.0], [4.0, 4.0, 4.0]]
 *
 *        // Batch matrix multiply
 *        A = [[[1.0, 1.0]], [[0.1, 0.1]]]
 *        B = [[[1.0, 1.0]], [[0.1, 0.1]]]
 *        gemm2(A, B, transpose_b=True, alpha=2.0)
 *        = [[[4.0]], [[0.04 ]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L163
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of input matrices
 * \param B Tensor of input matrices
 * \param transpose_a Multiply with transposed of first input (A).
 * \param transpose_b Multiply with transposed of second input (B).
 * \param alpha Scalar factor multiplied with A*B.
 * \param axis Axis corresponding to the matrix row indices.
 * \return new symbol
 */
inline Symbol _linalg_gemm2(const std::string& symbol_name,
                            Symbol A,
                            Symbol B,
                            bool transpose_a = false,
                            bool transpose_b = false,
                            double alpha = 1,
                            int axis = -2) {
  return Operator("_linalg_gemm2")
           .SetParam("transpose_a", transpose_a)
           .SetParam("transpose_b", transpose_b)
           .SetParam("alpha", alpha)
           .SetParam("axis", axis)
           .SetInput("A", A)
           .SetInput("B", B)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Performs Cholesky factorization of a symmetric positive-definite matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, the Cholesky factor *B* of the symmetric, positive definite matrix
 *        computed. *B* is triangular (entries of upper or lower triangle are all zero),
 *        positive diagonal entries, and:
 *
 *        *A* = *B* \* *B*\ :sup:`T`  if *lower* = *true*
 *        *A* = *B*\ :sup:`T` \* *B*  if *lower* = *false*
 *
 *        If *n>2*, *potrf* is performed separately on the trailing two dimensions for
 *        (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix factorization
 *        A = [[4.0, 1.0], [1.0, 4.25]]
 *        potrf(A) = [[2.0, 0], [0.5, 2.0]]
 *
 *        // Batch matrix factorization
 *        A = [[[4.0, 1.0], [1.0, 4.25]], [[16.0, 4.0], [4.0, 17.0]]]
 *        potrf(A) = [[[2.0, 0], [0.5, 2.0]], [[4.0, 0], [1.0, 4.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L214
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of input matrices to be decomposed
 * \return new symbol
 */
inline Symbol _linalg_potrf(const std::string& symbol_name,
                            Symbol A) {
  return Operator("_linalg_potrf")
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Performs matrix inversion from a Cholesky factorization.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, *A* is a triangular matrix (entries of upper or lower triangle are
 *        with positive diagonal. We compute:
 *
 *        *out* = *A*\ :sup:`-T` \* *A*\ :sup:`-1` if *lower* = *true*
 *        *out* = *A*\ :sup:`-1` \* *A*\ :sup:`-T` if *lower* = *false*
 *
 *        In other words, if *A* is the Cholesky factor of a symmetric positive definite
 *        *B* (obtained by *potrf*), then
 *
 *        *out* = *B*\ :sup:`-1`
 *
 *        If *n>2*, *potri* is performed separately on the trailing two dimensions for
 *        (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        .. note:: Use this operator only if you are certain you need the inverse of
 *        cannot use the Cholesky factor *A* (*potrf*), together with backsubstitution
 *        (*trsm*). The latter is numerically much safer, and also cheaper.
 *
 *        Examples::
 *
 *        // Single matrix inverse
 *        A = [[2.0, 0], [0.5, 2.0]]
 *        potri(A) = [[0.26563, -0.0625], [-0.0625, 0.25]]
 *
 *        // Batch matrix inverse
 *        A = [[[2.0, 0], [0.5, 2.0]], [[4.0, 0], [1.0, 4.0]]]
 *        potri(A) = [[[0.26563, -0.0625], [-0.0625, 0.25]],
 *        [[0.06641, -0.01562], [-0.01562, 0,0625]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L275
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of lower triangular matrices
 * \return new symbol
 */
inline Symbol _linalg_potri(const std::string& symbol_name,
                            Symbol A) {
  return Operator("_linalg_potri")
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Performs multiplication with a lower triangular matrix.
 *        Input are tensors *A*, *B*, each of dimension *n >= 2* and having the same shape
 *        on the leading *n-2* dimensions.
 *
 *        If *n=2*, *A* must be triangular. The operator performs the BLAS3 function
 *        *trmm*:
 *
 *        *out* = *alpha* \* *op*\ (*A*) \* *B*
 *
 *        if *rightside=False*, or
 *
 *        *out* = *alpha* \* *B* \* *op*\ (*A*)
 *
 *        if *rightside=True*. Here, *alpha* is a scalar parameter, and *op()* is either
 *        identity or the matrix transposition (depending on *transpose*).
 *
 *        If *n>2*, *trmm* is performed separately on the trailing two dimensions for all
 *        (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single triangular matrix multiply
 *        A = [[1.0, 0], [1.0, 1.0]]
 *        B = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
 *        trmm(A, B, alpha=2.0) = [[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]]
 *
 *        // Batch triangular matrix multiply
 *        A = [[[1.0, 0], [1.0, 1.0]], [[1.0, 0], [1.0, 1.0]]]
 *        B = [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]], [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]]
 *        trmm(A, B, alpha=2.0) = [[[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]],
 *        [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L333
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of lower triangular matrices
 * \param B Tensor of matrices
 * \param transpose Use transposed of the triangular matrix
 * \param rightside Multiply triangular matrix from the right to non-triangular one.
 * \param lower True if the triangular matrix is lower triangular, false if it is upper
 * \param alpha Scalar factor to be applied to the result.
 * \return new symbol
 */
inline Symbol _linalg_trmm(const std::string& symbol_name,
                           Symbol A,
                           Symbol B,
                           bool transpose = false,
                           bool rightside = false,
                           bool lower = true,
                           double alpha = 1) {
  return Operator("_linalg_trmm")
           .SetParam("transpose", transpose)
           .SetParam("rightside", rightside)
           .SetParam("lower", lower)
           .SetParam("alpha", alpha)
           .SetInput("A", A)
           .SetInput("B", B)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Solves matrix equation involving a lower triangular matrix.
 *        Input are tensors *A*, *B*, each of dimension *n >= 2* and having the same shape
 *        on the leading *n-2* dimensions.
 *
 *        If *n=2*, *A* must be triangular. The operator performs the BLAS3 function
 *        *trsm*, solving for *out* in:
 *
 *        *op*\ (*A*) \* *out* = *alpha* \* *B*
 *
 *        if *rightside=False*, or
 *
 *        *out* \* *op*\ (*A*) = *alpha* \* *B*
 *
 *        if *rightside=True*. Here, *alpha* is a scalar parameter, and *op()* is either
 *        identity or the matrix transposition (depending on *transpose*).
 *
 *        If *n>2*, *trsm* is performed separately on the trailing two dimensions for all
 *        (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix solve
 *        A = [[1.0, 0], [1.0, 1.0]]
 *        B = [[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]]
 *        trsm(A, B, alpha=0.5) = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
 *
 *        // Batch matrix solve
 *        A = [[[1.0, 0], [1.0, 1.0]], [[1.0, 0], [1.0, 1.0]]]
 *        B = [[[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]],
 *        [[4.0, 4.0, 4.0], [8.0, 8.0, 8.0]]]
 *        trsm(A, B, alpha=0.5) = [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
 *        [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L396
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of lower triangular matrices
 * \param B Tensor of matrices
 * \param transpose Use transposed of the triangular matrix
 * \param rightside Multiply triangular matrix from the right to non-triangular one.
 * \param lower True if the triangular matrix is lower triangular, false if it is upper
 * \param alpha Scalar factor to be applied to the result.
 * \return new symbol
 */
inline Symbol _linalg_trsm(const std::string& symbol_name,
                           Symbol A,
                           Symbol B,
                           bool transpose = false,
                           bool rightside = false,
                           bool lower = true,
                           double alpha = 1) {
  return Operator("_linalg_trsm")
           .SetParam("transpose", transpose)
           .SetParam("rightside", rightside)
           .SetParam("lower", lower)
           .SetParam("alpha", alpha)
           .SetInput("A", A)
           .SetInput("B", B)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the sum of the logarithms of the diagonal elements of a square matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, *A* must be square with positive diagonal entries. We sum the natural
 *        logarithms of the diagonal elements, the result has shape (1,).
 *
 *        If *n>2*, *sumlogdiag* is performed separately on the trailing two dimensions
 *        inputs (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix reduction
 *        A = [[1.0, 1.0], [1.0, 7.0]]
 *        sumlogdiag(A) = [1.9459]
 *
 *        // Batch matrix reduction
 *        A = [[[1.0, 1.0], [1.0, 7.0]], [[3.0, 0], [0, 17.0]]]
 *        sumlogdiag(A) = [1.9459, 3.9318]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L445
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of square matrices
 * \return new symbol
 */
inline Symbol _linalg_sumlogdiag(const std::string& symbol_name,
                                 Symbol A) {
  return Operator("_linalg_sumlogdiag")
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Extracts the diagonal entries of a square matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, then *A* represents a single square matrix which diagonal elements
 *
 *        If *n>2*, then *A* represents a batch of square matrices on the trailing two
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix diagonal extraction
 *        A = [[1.0, 2.0],
 *        [3.0, 4.0]]
 *
 *        extractdiag(A) = [1.0, 4.0]
 *
 *        extractdiag(A, 1) = [2.0]
 *
 *        // Batch matrix diagonal extraction
 *        A = [[[1.0, 2.0],
 *        [3.0, 4.0]],
 *        [[5.0, 6.0],
 *        [7.0, 8.0]]]
 *
 *        extractdiag(A) = [[1.0, 4.0],
 *        [5.0, 8.0]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L495
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of square matrices
 * \param offset Offset of the diagonal versus the main diagonal. 0 corresponds to the
 *        main diagonal, a negative/positive value to diagonals below/above the main
 * \return new symbol
 */
inline Symbol _linalg_extractdiag(const std::string& symbol_name,
                                  Symbol A,
                                  int offset = 0) {
  return Operator("_linalg_extractdiag")
           .SetParam("offset", offset)
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Constructs a square matrix with the input as diagonal.
 *        Input is a tensor *A* of dimension *n >= 1*.
 *
 *        If *n=1*, then *A* represents the diagonal entries of a single square matrix.
 *        If *n>1*, then *A* represents a batch of diagonals of square matrices. The
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single diagonal matrix construction
 *        A = [1.0, 2.0]
 *
 *        makediag(A)    = [[1.0, 0.0],
 *        [0.0, 2.0]]
 *
 *        makediag(A, 1) = [[0.0, 1.0, 0.0],
 *        [0.0, 0.0, 2.0],
 *        [0.0, 0.0, 0.0]]
 *
 *        // Batch diagonal matrix construction
 *        A = [[1.0, 2.0],
 *        [3.0, 4.0]]
 *
 *        makediag(A) = [[[1.0, 0.0],
 *        [0.0, 2.0]],
 *        [[3.0, 0.0],
 *        [0.0, 4.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L547
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of diagonal entries
 * \param offset Offset of the diagonal versus the main diagonal. 0 corresponds to the
 *        main diagonal, a negative/positive value to diagonals below/above the main
 * \return new symbol
 */
inline Symbol _linalg_makediag(const std::string& symbol_name,
                               Symbol A,
                               int offset = 0) {
  return Operator("_linalg_makediag")
           .SetParam("offset", offset)
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Extracts a triangular sub-matrix from a square matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, then *A* represents a single square matrix from which a triangular
 *
 *        If *n>2*, then *A* represents a batch of square matrices on the trailing two
 *        dimensions. The extracted triangular sub-matrices are returned as an
 *
 *        The *offset* and *lower* parameters determine the triangle to be extracted:
 *
 *        - When *offset = 0* either the lower or upper triangle with respect to the main
 *        - When *offset = k > 0* the upper triangle with respect to the k-th diagonal
 *        - When *offset = k < 0* the lower triangle with respect to the k-th diagonal
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single triagonal extraction
 *        A = [[1.0, 2.0],
 *        [3.0, 4.0]]
 *
 *        extracttrian(A) = [1.0, 3.0, 4.0]
 *        extracttrian(A, lower=False) = [1.0, 2.0, 4.0]
 *        extracttrian(A, 1) = [2.0]
 *        extracttrian(A, -1) = [3.0]
 *
 *        // Batch triagonal extraction
 *        A = [[[1.0, 2.0],
 *        [3.0, 4.0]],
 *        [[5.0, 6.0],
 *        [7.0, 8.0]]]
 *
 *        extracttrian(A) = [[1.0, 3.0, 4.0],
 *        [5.0, 7.0, 8.0]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L605
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of square matrices
 * \param offset Offset of the diagonal versus the main diagonal. 0 corresponds to the
 *        main diagonal, a negative/positive value to diagonals below/above the main
 * \param lower Refer to the lower triangular matrix if lower=true, refer to the upper
 * \return new symbol
 */
inline Symbol _linalg_extracttrian(const std::string& symbol_name,
                                   Symbol A,
                                   int offset = 0,
                                   bool lower = true) {
  return Operator("_linalg_extracttrian")
           .SetParam("offset", offset)
           .SetParam("lower", lower)
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Constructs a square matrix with the input representing a specific triangular
 *        This is basically the inverse of *linalg.extracttrian*. Input is a tensor *A*
 *
 *        If *n=1*, then *A* represents the entries of a triangular matrix which is lower
 *        triangular if *offset<0* or *offset=0*, *lower=true*. The resulting matrix is
 *        matrix with the entries outside the triangle set to zero and then adding
 *        diagonal with zero entries to the square matrix.
 *
 *        If *n>1*, then *A* represents a batch of triangular sub-matrices. The batch of
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single  matrix construction
 *        A = [1.0, 2.0, 3.0]
 *
 *        maketrian(A)              = [[1.0, 0.0],
 *        [2.0, 3.0]]
 *
 *        maketrian(A, lower=false) = [[1.0, 2.0],
 *        [0.0, 3.0]]
 *
 *        maketrian(A, offset=1)    = [[0.0, 1.0, 2.0],
 *        [0.0, 0.0, 3.0],
 *        [0.0, 0.0, 0.0]]
 *        maketrian(A, offset=-1)   = [[0.0, 0.0, 0.0],
 *        [1.0, 0.0, 0.0],
 *        [2.0, 3.0, 0.0]]
 *
 *        // Batch matrix construction
 *        A = [[1.0, 2.0, 3.0],
 *        [4.0, 5.0, 6.0]]
 *
 *        maketrian(A)           = [[[1.0, 0.0],
 *        [2.0, 3.0]],
 *        [[4.0, 0.0],
 *        [5.0, 6.0]]]
 *
 *        maketrian(A, offset=1) = [[[0.0, 1.0, 2.0],
 *        [0.0, 0.0, 3.0],
 *        [0.0, 0.0, 0.0]],
 *        [[0.0, 4.0, 5.0],
 *        [0.0, 0.0, 6.0],
 *        [0.0, 0.0, 0.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L673
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of triangular matrices stored as vectors
 * \param offset Offset of the diagonal versus the main diagonal. 0 corresponds to the
 *        main diagonal, a negative/positive value to diagonals below/above the main
 * \param lower Refer to the lower triangular matrix if lower=true, refer to the upper
 * \return new symbol
 */
inline Symbol _linalg_maketrian(const std::string& symbol_name,
                                Symbol A,
                                int offset = 0,
                                bool lower = true) {
  return Operator("_linalg_maketrian")
           .SetParam("offset", offset)
           .SetParam("lower", lower)
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Multiplication of matrix with its transpose.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, the operator performs the BLAS3 function *syrk*:
 *
 *        *out* = *alpha* \* *A* \* *A*\ :sup:`T`
 *
 *        if *transpose=False*, or
 *
 *        *out* = *alpha* \* *A*\ :sup:`T` \ \* *A*
 *
 *        if *transpose=True*.
 *
 *        If *n>2*, *syrk* is performed separately on the trailing two dimensions for all
 *        inputs (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix multiply
 *        A = [[1., 2., 3.], [4., 5., 6.]]
 *        syrk(A, alpha=1., transpose=False)
 *        = [[14., 32.],
 *        [32., 77.]]
 *        syrk(A, alpha=1., transpose=True)
 *        = [[17., 22., 27.],
 *        [22., 29., 36.],
 *        [27., 36., 45.]]
 *
 *        // Batch matrix multiply
 *        A = [[[1., 1.]], [[0.1, 0.1]]]
 *        syrk(A, alpha=2., transpose=False) = [[[4.]], [[0.04]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L730
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of input matrices
 * \param transpose Use transpose of input matrix.
 * \param alpha Scalar factor to be applied to the result.
 * \return new symbol
 */
inline Symbol _linalg_syrk(const std::string& symbol_name,
                           Symbol A,
                           bool transpose = false,
                           double alpha = 1) {
  return Operator("_linalg_syrk")
           .SetParam("transpose", transpose)
           .SetParam("alpha", alpha)
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief LQ factorization for general matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, we compute the LQ factorization (LAPACK *gelqf*, followed by
 *        must have shape *(x, y)* with *x <= y*, and must have full rank *=x*. The LQ
 *        factorization consists of *L* with shape *(x, x)* and *Q* with shape *(x, y)*,
 *        that:
 *
 *        *A* = *L* \* *Q*
 *
 *        Here, *L* is lower triangular (upper triangle equal to zero) with nonzero
 *        and *Q* is row-orthonormal, meaning that
 *
 *        *Q* \* *Q*\ :sup:`T`
 *
 *        is equal to the identity matrix of shape *(x, x)*.
 *
 *        If *n>2*, *gelqf* is performed separately on the trailing two dimensions for all
 *        inputs (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single LQ factorization
 *        A = [[1., 2., 3.], [4., 5., 6.]]
 *        Q, L = gelqf(A)
 *        Q = [[-0.26726124, -0.53452248, -0.80178373],
 *        [0.87287156, 0.21821789, -0.43643578]]
 *        L = [[-3.74165739, 0.],
 *        [-8.55235974, 1.96396101]]
 *
 *        // Batch LQ factorization
 *        A = [[[1., 2., 3.], [4., 5., 6.]],
 *        [[7., 8., 9.], [10., 11., 12.]]]
 *        Q, L = gelqf(A)
 *        Q = [[[-0.26726124, -0.53452248, -0.80178373],
 *        [0.87287156, 0.21821789, -0.43643578]],
 *        [[-0.50257071, -0.57436653, -0.64616234],
 *        [0.7620735, 0.05862104, -0.64483142]]]
 *        L = [[[-3.74165739, 0.],
 *        [-8.55235974, 1.96396101]],
 *        [[-13.92838828, 0.],
 *        [-19.09768702, 0.52758934]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L798
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of input matrices to be factorized
 * \return new symbol
 */
inline Symbol _linalg_gelqf(const std::string& symbol_name,
                            Symbol A) {
  return Operator("_linalg_gelqf")
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Eigendecomposition for symmetric matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, *A* must be symmetric, of shape *(x, x)*. We compute the
 *        resulting in the orthonormal matrix *U* of eigenvectors, shape *(x, x)*, and the
 *        vector *L* of eigenvalues, shape *(x,)*, so that:
 *
 *        *U* \* *A* = *diag(L)* \* *U*
 *
 *        Here:
 *
 *        *U* \* *U*\ :sup:`T` = *U*\ :sup:`T` \* *U* = *I*
 *
 *        where *I* is the identity matrix. Also, *L(0) <= L(1) <= L(2) <= ...*
 *
 *        If *n>2*, *syevd* is performed separately on the trailing two dimensions of *A*
 *        mode). In this case, *U* has *n* dimensions like *A*, and *L* has *n-1*
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        .. note:: Derivatives for this operator are defined only if *A* is such that
 *        eigenvalues are distinct, and the eigengaps are not too small. If you need
 *        gradients, do not apply this operator to matrices with multiple eigenvalues.
 *
 *        Examples::
 *
 *        // Single symmetric eigendecomposition
 *        A = [[1., 2.], [2., 4.]]
 *        U, L = syevd(A)
 *        U = [[0.89442719, -0.4472136],
 *        [0.4472136, 0.89442719]]
 *        L = [0., 5.]
 *
 *        // Batch symmetric eigendecomposition
 *        A = [[[1., 2.], [2., 4.]],
 *        [[1., 2.], [2., 5.]]]
 *        U, L = syevd(A)
 *        U = [[[0.89442719, -0.4472136],
 *        [0.4472136, 0.89442719]],
 *        [[0.92387953, -0.38268343],
 *        [0.38268343, 0.92387953]]]
 *        L = [[0., 5.],
 *        [0.17157288, 5.82842712]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L867
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of input matrices to be factorized
 * \return new symbol
 */
inline Symbol _linalg_syevd(const std::string& symbol_name,
                            Symbol A) {
  return Operator("_linalg_syevd")
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Compute the inverse of a matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, *A* is a square matrix. We compute:
 *
 *        *out* = *A*\ :sup:`-1`
 *
 *        If *n>2*, *inverse* is performed separately on the trailing two dimensions
 *        for all inputs (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix inversion
 *        A = [[1., 4.], [2., 3.]]
 *        inverse(A) = [[-0.6, 0.8], [0.4, -0.2]]
 *
 *        // Batch matrix inversion
 *        A = [[[1., 4.], [2., 3.]],
 *        [[1., 3.], [2., 4.]]]
 *        inverse(A) = [[[-0.6, 0.8], [0.4, -0.2]],
 *        [[-2., 1.5], [1., -0.5]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L917
 * \param symbol_name name of the resulting symbol
 * \param A Tensor of square matrix
 * \return new symbol
 */
inline Symbol _linalg_inverse(const std::string& symbol_name,
                              Symbol A) {
  return Operator("_linalg_inverse")
           .SetInput("A", A)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operators implements the histogram function.
 *
 *        Example::
 *        x = [[0, 1], [2, 2], [3, 4]]
 *        histo, bin_edges = histogram(data=x, bin_bounds=[], bin_cnt=5, range=(0,5))
 *        histo = [1, 1, 2, 1, 1]
 *        bin_edges = [0., 1., 2., 3., 4.]
 *        histo, bin_edges = histogram(data=x, bin_bounds=[0., 2.1, 3.])
 *        histo = [4, 1]
 *
 *
 *
 *        Defined in src/operator/tensor/histogram.cc:L136
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \param bins Input ndarray
 * \param bin_cnt Number of bins for uniform case
 * \param range The lower and upper range of the bins. if not provided, range is simply
 *        (a.min(), a.max()). values outside the range are ignored. the first element of
 *        the range must be less than or equal to the second. range affects the automatic
 *        bin computation as well. while bin width is computed to be optimal based on the
 *        actual data within range, the bin count will fill the entire range including
 * \return new symbol
 */
inline Symbol _histogram(const std::string& symbol_name,
                         Symbol data,
                         Symbol bins,
                         dmlc::optional<int> bin_cnt = dmlc::optional<int>(),
                         int64_t range = int64_t()) {
  return Operator("_histogram")
           .SetParam("bin_cnt", bin_cnt)
           .SetParam("range", range)
           .SetInput("data", data)
           .SetInput("bins", bins)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns indices of the maximum values along an axis.
 *
 *        In the case of multiple occurrences of maximum values, the indices
 *        are returned.
 *
 *        Examples::
 *
 *        x = [[ 0.,  1.,  2.],
 *        [ 3.,  4.,  5.]]
 *
 *        // argmax along axis 0
 *        argmax(x, axis=0) = [ 1.,  1.,  1.]
 *
 *        // argmax along axis 1
 *        argmax(x, axis=1) = [ 2.,  2.]
 *
 *        // argmax along axis 1 keeping same dims as an input array
 *        argmax(x, axis=1, keepdims=True) = [[ 2.],
 *        [ 2.]]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L52
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis along which to perform the reduction. Negative values means
 *        indexing from right to left. ``Requires axis to be set as int, because global
 * \param keepdims If this is set to `True`, the reduced axis is left in the result as
 * \return new symbol
 */
inline Symbol argmax(const std::string& symbol_name,
                     Symbol data,
                     dmlc::optional<int> axis = dmlc::optional<int>(),
                     bool keepdims = false) {
  return Operator("argmax")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns indices of the minimum values along an axis.
 *
 *        In the case of multiple occurrences of minimum values, the indices
 *        are returned.
 *
 *        Examples::
 *
 *        x = [[ 0.,  1.,  2.],
 *        [ 3.,  4.,  5.]]
 *
 *        // argmin along axis 0
 *        argmin(x, axis=0) = [ 0.,  0.,  0.]
 *
 *        // argmin along axis 1
 *        argmin(x, axis=1) = [ 0.,  0.]
 *
 *        // argmin along axis 1 keeping same dims as an input array
 *        argmin(x, axis=1, keepdims=True) = [[ 0.],
 *        [ 0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L77
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis along which to perform the reduction. Negative values means
 *        indexing from right to left. ``Requires axis to be set as int, because global
 * \param keepdims If this is set to `True`, the reduced axis is left in the result as
 * \return new symbol
 */
inline Symbol argmin(const std::string& symbol_name,
                     Symbol data,
                     dmlc::optional<int> axis = dmlc::optional<int>(),
                     bool keepdims = false) {
  return Operator("argmin")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns argmax indices of each channel from the input array.
 *
 *        The result will be an NDArray of shape (num_channel,).
 *
 *        In case of multiple occurrences of the maximum values, the indices
 *        are returned.
 *
 *        Examples::
 *
 *        x = [[ 0.,  1.,  2.],
 *        [ 3.,  4.,  5.]]
 *
 *        argmax_channel(x) = [ 2.,  2.]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L97
 * \param symbol_name name of the resulting symbol
 * \param data The input array
 * \return new symbol
 */
inline Symbol argmax_channel(const std::string& symbol_name,
                             Symbol data) {
  return Operator("argmax_channel")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief Specify how out-of-bound indices behave. Default is "clip". "clip" means clip
 *        to the range. So, if all indices mentioned are too large, they are replaced by
 *        the index that addresses the last element along an axis.  "wrap" means to wrap
 */
enum class PickMode {
  kClip = 0,
  kWrap = 1
};

/*!
 * \brief Picks elements from an input array according to the input indices along the
 *
 *        Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
 *        an output array of shape ``(i0,)`` with::
 *
 *        output[i] = input[i, indices[i]]
 *
 *        By default, if any index mentioned is too large, it is replaced by the index
 *        the last element along an axis (the `clip` mode).
 *
 *        This function supports n-dimensional input and (n-1)-dimensional indices arrays.
 *
 *        Examples::
 *
 *        x = [[ 1.,  2.],
 *        [ 3.,  4.],
 *        [ 5.,  6.]]
 *
 *        // picks elements with specified indices along axis 0
 *        pick(x, y=[0,1], 0) = [ 1.,  4.]
 *
 *        // picks elements with specified indices along axis 1
 *        pick(x, y=[0,1,0], 1) = [ 1.,  4.,  5.]
 *
 *        y = [[ 1.],
 *        [ 0.],
 *        [ 2.]]
 *
 *        // picks elements with specified indices along axis 1 using 'wrap' mode
 *        // to place indicies that would normally be out of bounds
 *        pick(x, y=[2,-1,-2], 1, mode='wrap') = [ 1.,  4.,  5.]
 *
 *        y = [[ 1.],
 *        [ 0.],
 *        [ 2.]]
 *
 *        // picks elements with specified indices along axis 1 and dims are maintained
 *        pick(x,y, 1, keepdims=True) = [[ 2.],
 *        [ 3.],
 *        [ 6.]]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L154
 * \param symbol_name name of the resulting symbol
 * \param data The input array
 * \param index The index array
 * \param axis int or None. The axis to picking the elements. Negative values means
 *        indexing from right to left. If is `None`, the elements in the index w.r.t the
 * \param keepdims If true, the axis where we pick the elements is left in the result as
 * \param mode Specify how out-of-bound indices behave. Default is "clip". "clip" means
 *        clip to the range. So, if all indices mentioned are too large, they are
 *        replaced by the index that addresses the last element along an axis.  "wrap"
 * \return new symbol
 */
inline Symbol pick(const std::string& symbol_name,
                   Symbol data,
                   Symbol index,
                   dmlc::optional<int> axis = dmlc::optional<int>(-1),
                   bool keepdims = false,
                   PickMode mode = PickMode::kClip) {
  static const char *PickModeValues[] = {
    "clip",
    "wrap"
  };
  return Operator("pick")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("mode", PickModeValues[int(mode)])
           .SetInput("data", data)
           .SetInput("index", index)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Divides arguments element-wise.  If the left-hand-side input is 'row_sparse',
 *        only the values which exist in the left-hand sparse array are computed.  The
 *        are ignored.
 *
 *        The storage type of ``_scatter_elemwise_div`` output depends on storage types
 *
 *        - _scatter_elemwise_div(row_sparse, row_sparse) = row_sparse
 *        - _scatter_elemwise_div(row_sparse, dense) = row_sparse
 *        - _scatter_elemwise_div(row_sparse, csr) = row_sparse
 *        - otherwise, ``_scatter_elemwise_div`` behaves exactly like elemwise_div and
 *        with default storage
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _scatter_elemwise_div(const std::string& symbol_name,
                                    Symbol lhs,
                                    Symbol rhs) {
  return Operator("_scatter_elemwise_div")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Adds a scalar to a tensor element-wise.  If the left-hand-side input is
 *        'row_sparse' or 'csr', then only the values which exist in the left-hand sparse
 *        The 'missing' values are ignored.
 *
 *        The storage type of ``_scatter_plus_scalar`` output depends on storage types of
 *
 *        - _scatter_plus_scalar(row_sparse, scalar) = row_sparse
 *        - _scatter_plus_scalar(csr, scalar) = csr
 *        - otherwise, ``_scatter_plus_scalar`` behaves exactly like _plus_scalar and
 *        with default storage
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _scatter_plus_scalar(const std::string& symbol_name,
                                   Symbol data,
                                   mx_float scalar) {
  return Operator("_scatter_plus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Subtracts a scalar to a tensor element-wise.  If the left-hand-side input is
 *        'row_sparse' or 'csr', then only the values which exist in the left-hand sparse
 *        The 'missing' values are ignored.
 *
 *        The storage type of ``_scatter_minus_scalar`` output depends on storage types
 *
 *        - _scatter_minus_scalar(row_sparse, scalar) = row_sparse
 *        - _scatter_minus_scalar(csr, scalar) = csr
 *        - otherwise, ``_scatter_minus_scalar`` behaves exactly like _minus_scalar and
 *        with default storage
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _scatter_minus_scalar(const std::string& symbol_name,
                                    Symbol data,
                                    mx_float scalar) {
  return Operator("_scatter_minus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise sum of the input arrays with broadcasting.
 *
 *        `broadcast_plus` is an alias to the function `broadcast_add`.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_add(x, y) = [[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]]
 *
 *        broadcast_plus(x, y) = [[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]]
 *
 *        Supported sparse operations:
 *
 *        broadcast_add(csr, dense(1D)) = dense
 *        broadcast_add(dense(1D), csr) = dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L58
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_add(const std::string& symbol_name,
                            Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_add")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise difference of the input arrays with broadcasting.
 *
 *        `broadcast_minus` is an alias to the function `broadcast_sub`.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_sub(x, y) = [[ 1.,  1.,  1.],
 *        [ 0.,  0.,  0.]]
 *
 *        broadcast_minus(x, y) = [[ 1.,  1.,  1.],
 *        [ 0.,  0.,  0.]]
 *
 *        Supported sparse operations:
 *
 *        broadcast_sub/minus(csr, dense(1D)) = dense
 *        broadcast_sub/minus(dense(1D), csr) = dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L106
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_sub(const std::string& symbol_name,
                            Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_sub")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise product of the input arrays with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_mul(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *        Supported sparse operations:
 *
 *        broadcast_mul(csr, dense(1D)) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L146
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_mul(const std::string& symbol_name,
                            Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_mul")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise division of the input arrays with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 6.,  6.,  6.],
 *        [ 6.,  6.,  6.]]
 *
 *        y = [[ 2.],
 *        [ 3.]]
 *
 *        broadcast_div(x, y) = [[ 3.,  3.,  3.],
 *        [ 2.,  2.,  2.]]
 *
 *        Supported sparse operations:
 *
 *        broadcast_div(csr, dense(1D)) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L187
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_div(const std::string& symbol_name,
                            Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_div")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise modulo of the input arrays with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 8.,  8.,  8.],
 *        [ 8.,  8.,  8.]]
 *
 *        y = [[ 2.],
 *        [ 3.]]
 *
 *        broadcast_mod(x, y) = [[ 0.,  0.,  0.],
 *        [ 2.,  2.,  2.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L222
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_mod(const std::string& symbol_name,
                            Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_mod")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes rectified linear activation.
 *
 *        .. math::
 *        max(features, 0)
 *
 *        The storage type of ``relu`` output depends upon the input storage type:
 *
 *        - relu(default) = default
 *        - relu(row_sparse) = row_sparse
 *        - relu(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L85
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol relu(const std::string& symbol_name,
                   Symbol data) {
  return Operator("relu")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes sigmoid of x element-wise.
 *
 *        .. math::
 *        y = 1 / (1 + exp(-x))
 *
 *        The storage type of ``sigmoid`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L119
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sigmoid(const std::string& symbol_name,
                      Symbol data) {
  return Operator("sigmoid")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes hard sigmoid of x element-wise.
 *
 *        .. math::
 *        y = max(0, min(1, alpha * x + beta))
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L161
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \param alpha Slope of hard sigmoid
 * \param beta Bias of hard sigmoid.
 * \return new symbol
 */
inline Symbol hard_sigmoid(const std::string& symbol_name,
                           Symbol data,
                           mx_float alpha = 0.200000003,
                           mx_float beta = 0.5) {
  return Operator("hard_sigmoid")
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes softsign of x element-wise.
 *
 *        .. math::
 *        y = x / (1 + abs(x))
 *
 *        The storage type of ``softsign`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L191
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol softsign(const std::string& symbol_name,
                       Symbol data) {
  return Operator("softsign")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns a copy of the input.
 *
 *        From:src/operator/tensor/elemwise_unary_op_basic.cc:246
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol _copy(const std::string& symbol_name,
                    Symbol data) {
  return Operator("_copy")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Stops gradient computation.
 *
 *        Stops the accumulated gradient of the inputs from flowing through this operator
 *        in the backward direction. In other words, this operator prevents the
 *        of its inputs to be taken into account for computing gradients.
 *
 *        Example::
 *
 *        v1 = [1, 2]
 *        v2 = [0, 1]
 *        a = Variable('a')
 *        b = Variable('b')
 *        b_stop_grad = stop_gradient(3 * b)
 *        loss = MakeLoss(b_stop_grad + a)
 *
 *        executor = loss.simple_bind(ctx=cpu(), a=(1,2), b=(1,2))
 *        executor.forward(is_train=True, a=v1, b=v2)
 *        executor.outputs
 *        [ 1.  5.]
 *
 *        executor.backward()
 *        executor.grad_arrays
 *        [ 0.  0.]
 *        [ 1.  1.]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L327
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol BlockGrad(const std::string& symbol_name,
                        Symbol data) {
  return Operator("BlockGrad")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Make your own loss function in network construction.
 *
 *        This operator accepts a customized loss function symbol as a terminal loss and
 *        the symbol should be an operator with no backward dependency.
 *        The output of this function is the gradient of loss with respect to the input
 *
 *        For example, if you are a making a cross entropy loss function. Assume ``out``
 *        predicted output and ``label`` is the true label, then the cross entropy can be
 *
 *        cross_entropy = label * log(out) + (1 - label) * log(1 - out)
 *        loss = make_loss(cross_entropy)
 *
 *        We will need to use ``make_loss`` when we are creating our own loss function or
 *        combine multiple loss functions. Also we may want to stop some variables'
 *        from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.
 *
 *        The storage type of ``make_loss`` output depends upon the input storage type:
 *
 *        - make_loss(default) = default
 *        - make_loss(row_sparse) = row_sparse
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L360
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol make_loss(const std::string& symbol_name,
                        Symbol data) {
  return Operator("make_loss")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs First input.
 * \param rhs Second input.
 * \return new symbol
 */
inline Symbol _identity_with_attr_like_rhs(const std::string& symbol_name,
                                           Symbol lhs,
                                           Symbol rhs) {
  return Operator("_identity_with_attr_like_rhs")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Reshape some or all dimensions of `lhs` to have the same shape as some or all
 *
 *        Returns a **view** of the `lhs` array with a new shape without altering any
 *
 *        Example::
 *
 *        x = [1, 2, 3, 4, 5, 6]
 *        y = [[0, -4], [3, 2], [2, 2]]
 *        reshape_like(x, y) = [[1, 2], [3, 4], [5, 6]]
 *
 *        More precise control over how dimensions are inherited is achieved by
 *        slices over the `lhs` and `rhs` array dimensions. Only the sliced `lhs`
 *        are reshaped to the `rhs` sliced dimensions, with the non-sliced `lhs`
 *
 *        Examples::
 *
 *        - lhs shape = (30,7), rhs shape = (15,2,4), lhs_begin=0, lhs_end=1,
 *        - lhs shape = (3, 5), rhs shape = (1,15,4), lhs_begin=0, lhs_end=2,
 *
 *        Negative indices are supported, and `None` can be used for either `lhs_end` or
 *
 *        Example::
 *
 *        - lhs shape = (30, 12), rhs shape = (4, 2, 2, 3), lhs_begin=-1, lhs_end=None,
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L513
 * \param symbol_name name of the resulting symbol
 * \param lhs First input.
 * \param rhs Second input.
 * \return new symbol
 */
inline Symbol reshape_like(const std::string& symbol_name,
                           Symbol lhs,
                           Symbol rhs) {
  return Operator("reshape_like")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns a 1D int64 array containing the shape of data.
 *
 *        Example::
 *
 *        shape_array([[1,2,3,4], [5,6,7,8]]) = [2,4]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L572
 * \param symbol_name name of the resulting symbol
 * \param data Input Array.
 * \param lhs_begin Defaults to 0. The beginning index along which the lhs dimensions are
 * \param lhs_end Defaults to None. The ending index along which the lhs dimensions are
 * \param rhs_begin Defaults to 0. The beginning index along which the rhs dimensions are
 * \param rhs_end Defaults to None. The ending index along which the rhs dimensions are
 * \return new symbol
 */
inline Symbol shape_array(const std::string& symbol_name,
                          Symbol data,
                          dmlc::optional<int> lhs_begin = dmlc::optional<int>(),
                          dmlc::optional<int> lhs_end = dmlc::optional<int>(),
                          dmlc::optional<int> rhs_begin = dmlc::optional<int>(),
                          dmlc::optional<int> rhs_end = dmlc::optional<int>()) {
  return Operator("shape_array")
           .SetParam("lhs_begin", lhs_begin)
           .SetParam("lhs_end", lhs_end)
           .SetParam("rhs_begin", rhs_begin)
           .SetParam("rhs_end", rhs_end)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns a 1D int64 array containing the size of data.
 *
 *        Example::
 *
 *        size_array([[1,2,3,4], [5,6,7,8]]) = [8]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L624
 * \param symbol_name name of the resulting symbol
 * \param data Input Array.
 * \return new symbol
 */
inline Symbol size_array(const std::string& symbol_name,
                         Symbol data) {
  return Operator("size_array")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief Output data type.
 */
enum class CastDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief Casts all elements of the input to a new type.
 *
 *        .. note:: ``Cast`` is deprecated. Use ``cast`` instead.
 *
 *        Example::
 *
 *        cast([0.9, 1.3], dtype='int32') = [0, 1]
 *        cast([1e20, 11.1], dtype='float16') = [inf, 11.09375]
 *        cast([300, 11.1, 10.9, -1, -3], dtype='uint8') = [44, 11, 10, 255, 253]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L662
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param dtype Output data type.
 * \return new symbol
 */
inline Symbol Cast(const std::string& symbol_name,
                   Symbol data,
                   CastDtype dtype) {
  static const char *CastDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("Cast")
           .SetParam("dtype", CastDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Numerical negative of the argument, element-wise.
 *
 *        The storage type of ``negative`` output depends upon the input storage type:
 *
 *        - negative(default) = default
 *        - negative(row_sparse) = row_sparse
 *        - negative(csr) = csr
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol negative(const std::string& symbol_name,
                       Symbol data) {
  return Operator("negative")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the reciprocal of the argument, element-wise.
 *
 *        Calculates 1/x.
 *
 *        Example::
 *
 *        reciprocal([-2, 1, 3, 1.6, 0.2]) = [-0.5, 1.0, 0.33333334, 0.625, 5.0]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L714
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol reciprocal(const std::string& symbol_name,
                         Symbol data) {
  return Operator("reciprocal")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise absolute value of the input.
 *
 *        Example::
 *
 *        abs([-2, 0, 3]) = [2, 0, 3]
 *
 *        The storage type of ``abs`` output depends upon the input storage type:
 *
 *        - abs(default) = default
 *        - abs(row_sparse) = row_sparse
 *        - abs(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L767
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol abs(const std::string& symbol_name,
                  Symbol data) {
  return Operator("abs")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise sign of the input.
 *
 *        Example::
 *
 *        sign([-2, 0, 3]) = [-1, 0, 1]
 *
 *        The storage type of ``sign`` output depends upon the input storage type:
 *
 *        - sign(default) = default
 *        - sign(row_sparse) = row_sparse
 *        - sign(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L805
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sign(const std::string& symbol_name,
                   Symbol data) {
  return Operator("sign")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise rounded value to the nearest integer of the input.
 *
 *        Example::
 *
 *        round([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  2., -2.,  2.,  2.]
 *
 *        The storage type of ``round`` output depends upon the input storage type:
 *
 *        - round(default) = default
 *        - round(row_sparse) = row_sparse
 *        - round(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L824
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol round(const std::string& symbol_name,
                    Symbol data) {
  return Operator("round")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise rounded value to the nearest integer of the input.
 *
 *        .. note::
 *        - For input ``n.5`` ``rint`` returns ``n`` while ``round`` returns ``n+1``.
 *        - For input ``-n.5`` both ``rint`` and ``round`` returns ``-n-1``.
 *
 *        Example::
 *
 *        rint([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  1., -2.,  2.,  2.]
 *
 *        The storage type of ``rint`` output depends upon the input storage type:
 *
 *        - rint(default) = default
 *        - rint(row_sparse) = row_sparse
 *        - rint(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L845
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol rint(const std::string& symbol_name,
                   Symbol data) {
  return Operator("rint")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise ceiling of the input.
 *
 *        The ceil of the scalar x is the smallest integer i, such that i >= x.
 *
 *        Example::
 *
 *        ceil([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  2.,  2.,  3.]
 *
 *        The storage type of ``ceil`` output depends upon the input storage type:
 *
 *        - ceil(default) = default
 *        - ceil(row_sparse) = row_sparse
 *        - ceil(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L864
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol ceil(const std::string& symbol_name,
                   Symbol data) {
  return Operator("ceil")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise floor of the input.
 *
 *        The floor of the scalar x is the largest integer i, such that i <= x.
 *
 *        Example::
 *
 *        floor([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-3., -2.,  1.,  1.,  2.]
 *
 *        The storage type of ``floor`` output depends upon the input storage type:
 *
 *        - floor(default) = default
 *        - floor(row_sparse) = row_sparse
 *        - floor(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L883
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol floor(const std::string& symbol_name,
                    Symbol data) {
  return Operator("floor")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Return the element-wise truncated value of the input.
 *
 *        The truncated value of the scalar x is the nearest integer i which is closer to
 *        zero than x is. In short, the fractional part of the signed number x is
 *
 *        Example::
 *
 *        trunc([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  1.,  1.,  2.]
 *
 *        The storage type of ``trunc`` output depends upon the input storage type:
 *
 *        - trunc(default) = default
 *        - trunc(row_sparse) = row_sparse
 *        - trunc(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L903
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol trunc(const std::string& symbol_name,
                    Symbol data) {
  return Operator("trunc")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise rounded value to the nearest \
 *        integer towards zero of the input.
 *
 *        Example::
 *
 *        fix([-2.1, -1.9, 1.9, 2.1]) = [-2., -1.,  1., 2.]
 *
 *        The storage type of ``fix`` output depends upon the input storage type:
 *
 *        - fix(default) = default
 *        - fix(row_sparse) = row_sparse
 *        - fix(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L921
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol fix(const std::string& symbol_name,
                  Symbol data) {
  return Operator("fix")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise squared value of the input.
 *
 *        .. math::
 *        square(x) = x^2
 *
 *        Example::
 *
 *        square([2, 3, 4]) = [4, 9, 16]
 *
 *        The storage type of ``square`` output depends upon the input storage type:
 *
 *        - square(default) = default
 *        - square(row_sparse) = row_sparse
 *        - square(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L961
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol square(const std::string& symbol_name,
                     Symbol data) {
  return Operator("square")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise square-root value of the input.
 *
 *        .. math::
 *        \textrm{sqrt}(x) = \sqrt{x}
 *
 *        Example::
 *
 *        sqrt([4, 9, 16]) = [2, 3, 4]
 *
 *        The storage type of ``sqrt`` output depends upon the input storage type:
 *
 *        - sqrt(default) = default
 *        - sqrt(row_sparse) = row_sparse
 *        - sqrt(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L985
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sqrt(const std::string& symbol_name,
                   Symbol data) {
  return Operator("sqrt")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise inverse square-root value of the input.
 *
 *        .. math::
 *        rsqrt(x) = 1/\sqrt{x}
 *
 *        Example::
 *
 *        rsqrt([4,9,16]) = [0.5, 0.33333334, 0.25]
 *
 *        The storage type of ``rsqrt`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1005
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol rsqrt(const std::string& symbol_name,
                    Symbol data) {
  return Operator("rsqrt")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise cube-root value of the input.
 *
 *        .. math::
 *        cbrt(x) = \sqrt[3]{x}
 *
 *        Example::
 *
 *        cbrt([1, 8, -125]) = [1, 2, -5]
 *
 *        The storage type of ``cbrt`` output depends upon the input storage type:
 *
 *        - cbrt(default) = default
 *        - cbrt(row_sparse) = row_sparse
 *        - cbrt(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1028
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol cbrt(const std::string& symbol_name,
                   Symbol data) {
  return Operator("cbrt")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise gauss error function of the input.
 *
 *        Example::
 *
 *        erf([0, -1., 10.]) = [0., -0.8427, 1.]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1042
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol erf(const std::string& symbol_name,
                  Symbol data) {
  return Operator("erf")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise inverse gauss error function of the input.
 *
 *        Example::
 *
 *        erfinv([0, 0.5., -1.]) = [0., 0.4769, -inf]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1063
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol erfinv(const std::string& symbol_name,
                     Symbol data) {
  return Operator("erfinv")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise inverse cube-root value of the input.
 *
 *        .. math::
 *        rcbrt(x) = 1/\sqrt[3]{x}
 *
 *        Example::
 *
 *        rcbrt([1,8,-125]) = [1.0, 0.5, -0.2]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1082
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol rcbrt(const std::string& symbol_name,
                    Symbol data) {
  return Operator("rcbrt")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise exponential value of the input.
 *
 *        .. math::
 *        exp(x) = e^x \approx 2.718^x
 *
 *        Example::
 *
 *        exp([0, 1, 2]) = [1., 2.71828175, 7.38905621]
 *
 *        The storage type of ``exp`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1122
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol exp(const std::string& symbol_name,
                  Symbol data) {
  return Operator("exp")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise Natural logarithmic value of the input.
 *
 *        The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x``
 *
 *        The storage type of ``log`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1135
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol log(const std::string& symbol_name,
                  Symbol data) {
  return Operator("log")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise Base-10 logarithmic value of the input.
 *
 *        ``10**log10(x) = x``
 *
 *        The storage type of ``log10`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1152
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol log10(const std::string& symbol_name,
                    Symbol data) {
  return Operator("log10")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise Base-2 logarithmic value of the input.
 *
 *        ``2**log2(x) = x``
 *
 *        The storage type of ``log2`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1164
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol log2(const std::string& symbol_name,
                   Symbol data) {
  return Operator("log2")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise ``log(1 + x)`` value of the input.
 *
 *        This function is more accurate than ``log(1 + x)``  for small ``x`` so that
 *        :math:`1+x\approx 1`
 *
 *        The storage type of ``log1p`` output depends upon the input storage type:
 *
 *        - log1p(default) = default
 *        - log1p(row_sparse) = row_sparse
 *        - log1p(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1265
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol log1p(const std::string& symbol_name,
                    Symbol data) {
  return Operator("log1p")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns ``exp(x) - 1`` computed element-wise on the input.
 *
 *        This function provides greater precision than ``exp(x) - 1`` for small values
 *
 *        The storage type of ``expm1`` output depends upon the input storage type:
 *
 *        - expm1(default) = default
 *        - expm1(row_sparse) = row_sparse
 *        - expm1(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1283
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol expm1(const std::string& symbol_name,
                    Symbol data) {
  return Operator("expm1")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the gamma function (extension of the factorial function \
 *        to the reals), computed element-wise on the input array.
 *
 *        The storage type of ``gamma`` output is always dense
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol gamma(const std::string& symbol_name,
                    Symbol data) {
  return Operator("gamma")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise log of the absolute value of the gamma function \
 *        of the input.
 *
 *        The storage type of ``gammaln`` output is always dense
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol gammaln(const std::string& symbol_name,
                      Symbol data) {
  return Operator("gammaln")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of logical NOT (!) function
 *
 *        Example:
 *        logical_not([-2., 0., 1.]) = [0., 1., 0.]
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol logical_not(const std::string& symbol_name,
                          Symbol data) {
  return Operator("logical_not")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Pick rows specified by user input index array from a row sparse matrix
 *        and save them in the output sparse matrix.
 *
 *        Example::
 *
 *        data = [[1, 2], [3, 4], [5, 6]]
 *        indices = [0, 1, 3]
 *        shape = (4, 2)
 *        rsp_in = row_sparse_array(data, indices)
 *        to_retain = [0, 3]
 *        rsp_out = retain(rsp_in, to_retain)
 *        rsp_out.data = [[1, 2], [5, 6]]
 *        rsp_out.indices = [0, 3]
 *
 *        The storage type of ``retain`` output depends on storage types of inputs
 *
 *        - retain(row_sparse, default) = row_sparse
 *        - otherwise, ``retain`` is not supported
 *
 *
 *
 *        Defined in src/operator/tensor/sparse_retain.cc:L53
 * \param symbol_name name of the resulting symbol
 * \param data The input array for sparse_retain operator.
 * \param indices The index array of rows ids that will be retained.
 * \return new symbol
 */
inline Symbol _sparse_retain(const std::string& symbol_name,
                             Symbol data,
                             Symbol indices) {
  return Operator("_sparse_retain")
           .SetInput("data", data)
           .SetInput("indices", indices)
           .CreateSymbol(symbol_name);
}

/*! \brief Output data type.
 */
enum class Amp_castDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief Cast function between low precision float/FP32 used by AMP.
 *
 *        It casts only between low precision float/FP32 and does not do anything for
 *
 *
 *        Defined in src/operator/tensor/amp_cast.cc:L37
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param dtype Output data type.
 * \return new symbol
 */
inline Symbol amp_cast(const std::string& symbol_name,
                       Symbol data,
                       Amp_castDtype dtype) {
  static const char *Amp_castDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("amp_cast")
           .SetParam("dtype", Amp_castDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Cast function used by AMP, that casts its inputs to the common widest type.
 *
 *        It casts only between low precision float/FP32 and does not do anything for
 *
 *
 *
 *        Defined in src/operator/tensor/amp_cast.cc:L71
 * \param symbol_name name of the resulting symbol
 * \param data Weights
 * \param num_outputs Number of input/output pairs to be casted to the widest type.
 * \return new symbol
 */
inline Symbol amp_multicast(const std::string& symbol_name,
                            const std::vector<Symbol>& data,
                            int num_outputs) {
  return Operator("amp_multicast")
           .SetParam("num_outputs", num_outputs)
(data)
           .CreateSymbol(symbol_name);
}

/*! \brief The return type.
 *        "value" means to return the top k values, "indices" means to return the indices
 *        of the top k values, "mask" means to return a mask array containing 0 and 1. 1
 *        means the top k values. "both" means to return a list of both values and
 */
enum class TopkRetTyp {
  kBoth = 0,
  kIndices = 1,
  kMask = 2,
  kValue = 3
};

/*! \brief DType of the output indices when ret_typ is "indices" or "both". An error will
 */
enum class TopkDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kUint8 = 5
};

/*!
 * \brief Returns the top *k* elements in an input array along the given axis.
 *        The returned elements will be sorted.
 *
 *        Examples::
 *
 *        x = [[ 0.3,  0.2,  0.4],
 *        [ 0.1,  0.3,  0.2]]
 *
 *        // returns an index of the largest element on last axis
 *        topk(x) = [[ 2.],
 *        [ 1.]]
 *
 *        // returns the value of top-2 largest elements on last axis
 *        topk(x, ret_typ='value', k=2) = [[ 0.4,  0.3],
 *        [ 0.3,  0.2]]
 *
 *        // returns the value of top-2 smallest elements on last axis
 *        topk(x, ret_typ='value', k=2, is_ascend=1) = [[ 0.2 ,  0.3],
 *        [ 0.1 ,  0.2]]
 *
 *        // returns the value of top-2 largest elements on axis 0
 *        topk(x, axis=0, ret_typ='value', k=2) = [[ 0.3,  0.3,  0.4],
 *        [ 0.1,  0.2,  0.2]]
 *
 *        // flattens and then returns list of both values and indices
 *        topk(x, ret_typ='both', k=2) = [[[ 0.4,  0.3], [ 0.3,  0.2]] ,  [[ 2.,  0.], [
 *
 *
 *
 *        Defined in src/operator/tensor/ordering_op.cc:L64
 * \param symbol_name name of the resulting symbol
 * \param data The input array
 * \param axis Axis along which to choose the top k indices. If not given, the flattened
 * \param k Number of top elements to select, should be always smaller than or equal to
 * \param ret_typ The return type.
 *        "value" means to return the top k values, "indices" means to return the indices
 *        of the top k values, "mask" means to return a mask array containing 0 and 1. 1
 *        means the top k values. "both" means to return a list of both values and
 * \param is_ascend Whether to choose k largest or k smallest elements. Top K largest
 * \param dtype DType of the output indices when ret_typ is "indices" or "both". An error
 * \return new symbol
 */
inline Symbol topk(const std::string& symbol_name,
                   Symbol data,
                   dmlc::optional<int> axis = dmlc::optional<int>(-1),
                   int k = 1,
                   TopkRetTyp ret_typ = TopkRetTyp::kIndices,
                   bool is_ascend = false,
                   TopkDtype dtype = TopkDtype::kFloat32) {
  static const char *TopkRetTypValues[] = {
    "both",
    "indices",
    "mask",
    "value"
  };
  static const char *TopkDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "uint8"
  };
  return Operator("topk")
           .SetParam("axis", axis)
           .SetParam("k", k)
           .SetParam("ret_typ", TopkRetTypValues[int(ret_typ)])
           .SetParam("is_ascend", is_ascend)
           .SetParam("dtype", TopkDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns a sorted copy of an input array along the given axis.
 *
 *        Examples::
 *
 *        x = [[ 1, 4],
 *        [ 3, 1]]
 *
 *        // sorts along the last axis
 *        sort(x) = [[ 1.,  4.],
 *        [ 1.,  3.]]
 *
 *        // flattens and then sorts
 *        sort(x, axis=None) = [ 1.,  1.,  3.,  4.]
 *
 *        // sorts along the first axis
 *        sort(x, axis=0) = [[ 1.,  1.],
 *        [ 3.,  4.]]
 *
 *        // in a descend order
 *        sort(x, is_ascend=0) = [[ 4.,  1.],
 *        [ 3.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/ordering_op.cc:L127
 * \param symbol_name name of the resulting symbol
 * \param data The input array
 * \param axis Axis along which to choose sort the input tensor. If not given, the
 * \param is_ascend Whether to sort in ascending or descending order.
 * \return new symbol
 */
inline Symbol sort(const std::string& symbol_name,
                   Symbol data,
                   dmlc::optional<int> axis = dmlc::optional<int>(-1),
                   bool is_ascend = true) {
  return Operator("sort")
           .SetParam("axis", axis)
           .SetParam("is_ascend", is_ascend)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output indices. It is only valid when ret_typ is "indices" or
 *        "both". An error will be raised if the selected data type cannot precisely
 */
enum class ArgsortDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kUint8 = 5
};

/*!
 * \brief Returns the indices that would sort an input array along the given axis.
 *
 *        This function performs sorting along the given axis and returns an array of
 *        as an input array that index data in sorted order.
 *
 *        Examples::
 *
 *        x = [[ 0.3,  0.2,  0.4],
 *        [ 0.1,  0.3,  0.2]]
 *
 *        // sort along axis -1
 *        argsort(x) = [[ 1.,  0.,  2.],
 *        [ 0.,  2.,  1.]]
 *
 *        // sort along axis 0
 *        argsort(x, axis=0) = [[ 1.,  0.,  1.]
 *        [ 0.,  1.,  0.]]
 *
 *        // flatten and then sort
 *        argsort(x, axis=None) = [ 3.,  1.,  5.,  0.,  4.,  2.]
 *
 *
 *        Defined in src/operator/tensor/ordering_op.cc:L177
 * \param symbol_name name of the resulting symbol
 * \param data The input array
 * \param axis Axis along which to sort the input tensor. If not given, the flattened
 * \param is_ascend Whether to sort in ascending or descending order.
 * \param dtype DType of the output indices. It is only valid when ret_typ is "indices"
 *        or "both". An error will be raised if the selected data type cannot precisely
 * \return new symbol
 */
inline Symbol argsort(const std::string& symbol_name,
                      Symbol data,
                      dmlc::optional<int> axis = dmlc::optional<int>(-1),
                      bool is_ascend = true,
                      ArgsortDtype dtype = ArgsortDtype::kFloat32) {
  static const char *ArgsortDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "uint8"
  };
  return Operator("argsort")
           .SetParam("axis", axis)
           .SetParam("is_ascend", is_ascend)
           .SetParam("dtype", ArgsortDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _plus_scalar(const std::string& symbol_name,
                           Symbol data,
                           mx_float scalar) {
  return Operator("_plus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _minus_scalar(const std::string& symbol_name,
                            Symbol data,
                            mx_float scalar) {
  return Operator("_minus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _rminus_scalar(const std::string& symbol_name,
                             Symbol data,
                             mx_float scalar) {
  return Operator("_rminus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Multiply an array with a scalar.
 *
 *        ``_mul_scalar`` only operates on data array of input if input is sparse.
 *
 *        For example, if input of shape (100, 100) has only 2 non zero elements,
 *        i.e. input.data = [5, 6], scalar = nan,
 *        it will result output.data = [nan, nan] instead of 10000 nans.
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_scalar_op_basic.cc:L149
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _mul_scalar(const std::string& symbol_name,
                          Symbol data,
                          mx_float scalar) {
  return Operator("_mul_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Divide an array with a scalar.
 *
 *        ``_div_scalar`` only operates on data array of input if input is sparse.
 *
 *        For example, if input of shape (100, 100) has only 2 non zero elements,
 *        i.e. input.data = [5, 6], scalar = nan,
 *        it will result output.data = [nan, nan] instead of 10000 nans.
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_scalar_op_basic.cc:L171
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _div_scalar(const std::string& symbol_name,
                          Symbol data,
                          mx_float scalar) {
  return Operator("_div_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _rdiv_scalar(const std::string& symbol_name,
                           Symbol data,
                           mx_float scalar) {
  return Operator("_rdiv_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _mod_scalar(const std::string& symbol_name,
                          Symbol data,
                          mx_float scalar) {
  return Operator("_mod_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _rmod_scalar(const std::string& symbol_name,
                           Symbol data,
                           mx_float scalar) {
  return Operator("_rmod_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief Data type of weight.
 */
enum class EmbeddingDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief Maps integer indices to vector representations (embeddings).
 *
 *        This operator maps words to real-valued vectors in a high-dimensional space,
 *        called word embeddings. These embeddings can capture semantic and syntactic
 *        For example, it has been noted that in the learned embedding spaces, similar
 *        to be close to each other and dissimilar words far apart.
 *
 *        For an input array of shape (d1, ..., dK),
 *        the shape of an output array is (d1, ..., dK, output_dim).
 *        All the input values should be integers in the range [0, input_dim).
 *
 *        If the input_dim is ip0 and output_dim is op0, then shape of the embedding
 *        (ip0, op0).
 *
 *        By default, if any index mentioned is too large, it is replaced by the index
 *        the last vector in an embedding matrix.
 *
 *        Examples::
 *
 *        input_dim = 4
 *        output_dim = 5
 *
 *        // Each row in weight matrix y represents a word. So, y = (w0,w1,w2,w3)
 *        y = [[  0.,   1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.,   9.],
 *        [ 10.,  11.,  12.,  13.,  14.],
 *        [ 15.,  16.,  17.,  18.,  19.]]
 *
 *        // Input array x represents n-grams(2-gram). So, x = [(w1,w3), (w0,w2)]
 *        x = [[ 1.,  3.],
 *        [ 0.,  2.]]
 *
 *        // Mapped input x to its vector representation y.
 *        Embedding(x, y, 4, 5) = [[[  5.,   6.,   7.,   8.,   9.],
 *        [ 15.,  16.,  17.,  18.,  19.]],
 *
 *        [[  0.,   1.,   2.,   3.,   4.],
 *        [ 10.,  11.,  12.,  13.,  14.]]]
 *
 *
 *        The storage type of weight can be either row_sparse or default.
 *
 *        .. Note::
 *
 *        If "sparse_grad" is set to True, the storage type of gradient w.r.t weights
 *        "row_sparse". Only a subset of optimizers support sparse gradients, including
 *        and Adam. Note that by default lazy updates is turned on, which may perform
 *        from standard updates. For more details, please check the Optimization API at:
 *        https://mxnet.incubator.apache.org/api/python/optimization/optimization.html
 *
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L519
 * \param symbol_name name of the resulting symbol
 * \param data The input array to the embedding operator.
 * \param weight The embedding weight matrix.
 * \param input_dim Vocabulary size of the input indices.
 * \param output_dim Dimension of the embedding vectors.
 * \param dtype Data type of weight.
 * \param sparse_grad Compute row sparse gradient in the backward calculation. If set to
 * \return new symbol
 */
inline Symbol Embedding(const std::string& symbol_name,
                        Symbol data,
                        Symbol weight,
                        int input_dim,
                        int output_dim,
                        EmbeddingDtype dtype = EmbeddingDtype::kFloat32,
                        bool sparse_grad = false) {
  static const char *EmbeddingDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("Embedding")
           .SetParam("input_dim", input_dim)
           .SetParam("output_dim", output_dim)
           .SetParam("dtype", EmbeddingDtypeValues[int(dtype)])
           .SetParam("sparse_grad", sparse_grad)
           .SetInput("data", data)
           .SetInput("weight", weight)
           .CreateSymbol(symbol_name);
}

/*! \brief Data type of weight.
 */
enum class _contrib_SparseEmbeddingDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief Maps integer indices to vector representations (embeddings).
 *
 *        note:: ``contrib.SparseEmbedding`` is deprecated, use ``Embedding`` instead.
 *
 *        This operator maps words to real-valued vectors in a high-dimensional space,
 *        called word embeddings. These embeddings can capture semantic and syntactic
 *        For example, it has been noted that in the learned embedding spaces, similar
 *        to be close to each other and dissimilar words far apart.
 *
 *        For an input array of shape (d1, ..., dK),
 *        the shape of an output array is (d1, ..., dK, output_dim).
 *        All the input values should be integers in the range [0, input_dim).
 *
 *        If the input_dim is ip0 and output_dim is op0, then shape of the embedding
 *        (ip0, op0).
 *
 *        The storage type of the gradient will be `row_sparse`.
 *
 *        .. Note::
 *
 *        `SparseEmbedding` is designed for the use case where `input_dim` is very large
 *        The operator is available on both CPU and GPU.
 *        When `deterministic` is set to `True`, the accumulation of gradients follows a
 *        deterministic order if a feature appears multiple times in the input. However,
 *        accumulation is usually slower when the order is enforced on GPU.
 *        When the operator is used on the GPU, the recommended value for `deterministic`
 *
 *        Examples::
 *
 *        input_dim = 4
 *        output_dim = 5
 *
 *        // Each row in weight matrix y represents a word. So, y = (w0,w1,w2,w3)
 *        y = [[  0.,   1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.,   9.],
 *        [ 10.,  11.,  12.,  13.,  14.],
 *        [ 15.,  16.,  17.,  18.,  19.]]
 *
 *        // Input array x represents n-grams(2-gram). So, x = [(w1,w3), (w0,w2)]
 *        x = [[ 1.,  3.],
 *        [ 0.,  2.]]
 *
 *        // Mapped input x to its vector representation y.
 *        SparseEmbedding(x, y, 4, 5) = [[[  5.,   6.,   7.,   8.,   9.],
 *        [ 15.,  16.,  17.,  18.,  19.]],
 *
 *        [[  0.,   1.,   2.,   3.,   4.],
 *        [ 10.,  11.,  12.,  13.,  14.]]]
 *
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L595
 * \param symbol_name name of the resulting symbol
 * \param data The input array to the embedding operator.
 * \param weight The embedding weight matrix.
 * \param input_dim Vocabulary size of the input indices.
 * \param output_dim Dimension of the embedding vectors.
 * \param dtype Data type of weight.
 * \param sparse_grad Compute row sparse gradient in the backward calculation. If set to
 * \return new symbol
 */
inline Symbol _contrib_SparseEmbedding(const std::string& symbol_name,
                                       Symbol data,
                                       Symbol weight,
                                       int input_dim,
                                       int output_dim,
                                       _contrib_SparseEmbeddingDtype dtype = _contrib_SparseEmbeddingDtype::kFloat32,
                                       bool sparse_grad = false) {
  static const char *_contrib_SparseEmbeddingDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_contrib_SparseEmbedding")
           .SetParam("input_dim", input_dim)
           .SetParam("output_dim", output_dim)
           .SetParam("dtype", _contrib_SparseEmbeddingDtypeValues[int(dtype)])
           .SetParam("sparse_grad", sparse_grad)
           .SetInput("data", data)
           .SetInput("weight", weight)
           .CreateSymbol(symbol_name);
}

/*! \brief Specify how out-of-bound indices bahave. Default is "clip". "clip" means clip
 *        to the range. So, if all indices mentioned are too large, they are replaced by
 *        the index that addresses the last element along an axis.  "wrap" means to wrap
 */
enum class TakeMode {
  kClip = 0,
  kRaise = 1,
  kWrap = 2
};

/*!
 * \brief Takes elements from an input array along the given axis.
 *
 *        This function slices the input array along a particular axis with the provided
 *
 *        Given data tensor of rank r >= 1, and indices tensor of rank q, gather entries
 *        dimension of data (by default outer-most one as axis=0) indexed by indices, and
 *        in an output tensor of rank q + (r - 1).
 *
 *        Examples::
 *
 *        x = [4.  5.  6.]
 *
 *        // Trivial case, take the second element along the first axis.
 *
 *        take(x, [1]) = [ 5. ]
 *
 *        // The other trivial case, axis=-1, take the third element along the first axis
 *
 *        take(x, [3], axis=-1, mode='clip') = [ 6. ]
 *
 *        x = [[ 1.,  2.],
 *        [ 3.,  4.],
 *        [ 5.,  6.]]
 *
 *        // In this case we will get rows 0 and 1, then 1 and 2. Along axis 0
 *
 *        take(x, [[0,1],[1,2]]) = [[[ 1.,  2.],
 *        [ 3.,  4.]],
 *
 *        [[ 3.,  4.],
 *        [ 5.,  6.]]]
 *
 *        // In this case we will get rows 0 and 1, then 1 and 2 (calculated by wrapping
 *        // Along axis 1
 *
 *        take(x, [[0, 3], [-1, -2]], axis=1, mode='wrap') = [[[ 1.  2.]
 *        [ 2.  1.]]
 *
 *        [[ 3.  4.]
 *        [ 4.  3.]]
 *
 *        [[ 5.  6.]
 *        [ 6.  5.]]]
 *
 *        The storage type of ``take`` output depends upon the input storage type:
 *
 *        - take(default, default) = default
 *        - take(csr, default, axis=0) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L695
 * \param symbol_name name of the resulting symbol
 * \param a The input array.
 * \param indices The indices of the values to be extracted.
 * \param axis The axis of input array to be taken.For input tensor of rank r, it could
 * \param mode Specify how out-of-bound indices bahave. Default is "clip". "clip" means
 *        clip to the range. So, if all indices mentioned are too large, they are
 *        replaced by the index that addresses the last element along an axis.  "wrap"
 * \return new symbol
 */
inline Symbol take(const std::string& symbol_name,
                   Symbol a,
                   Symbol indices,
                   int axis = 0,
                   TakeMode mode = TakeMode::kClip) {
  static const char *TakeModeValues[] = {
    "clip",
    "raise",
    "wrap"
  };
  return Operator("take")
           .SetParam("axis", axis)
           .SetParam("mode", TakeModeValues[int(mode)])
           .SetInput("a", a)
           .SetInput("indices", indices)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Takes elements from a data batch.
 *
 *        .. note::
 *        `batch_take` is deprecated. Use `pick` instead.
 *
 *        Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
 *        an output array of shape ``(i0,)`` with::
 *
 *        output[i] = input[i, indices[i]]
 *
 *        Examples::
 *
 *        x = [[ 1.,  2.],
 *        [ 3.,  4.],
 *        [ 5.,  6.]]
 *
 *        // takes elements with specified indices
 *        batch_take(x, [0,1,0]) = [ 1.  4.  5.]
 *
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L753
 * \param symbol_name name of the resulting symbol
 * \param a The input array
 * \param indices The index array
 * \return new symbol
 */
inline Symbol batch_take(const std::string& symbol_name,
                         Symbol a,
                         Symbol indices) {
  return Operator("batch_take")
           .SetInput("a", a)
           .SetInput("indices", indices)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output
 */
enum class One_hotDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kInt64 = 4,
  kInt8 = 5,
  kUint8 = 6
};

/*!
 * \brief Returns a one-hot array.
 *
 *        The locations represented by `indices` take value `on_value`, while all
 *        other locations take value `off_value`.
 *
 *        `one_hot` operation with `indices` of shape ``(i0, i1)`` and `depth`  of ``d``
 *        in an output array of shape ``(i0, i1, d)`` with::
 *
 *        output[i,j,:] = off_value
 *        output[i,j,indices[i,j]] = on_value
 *
 *        Examples::
 *
 *        one_hot([1,0,2,0], 3) = [[ 0.  1.  0.]
 *        [ 1.  0.  0.]
 *        [ 0.  0.  1.]
 *        [ 1.  0.  0.]]
 *
 *        one_hot([1,0,2,0], 3, on_value=8, off_value=1,
 *        dtype='int32') = [[1 8 1]
 *        [8 1 1]
 *        [1 1 8]
 *        [8 1 1]]
 *
 *        one_hot([[1,0],[1,0],[2,0]], 3) = [[[ 0.  1.  0.]
 *        [ 1.  0.  0.]]
 *
 *        [[ 0.  1.  0.]
 *        [ 1.  0.  0.]]
 *
 *        [[ 0.  0.  1.]
 *        [ 1.  0.  0.]]]
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L799
 * \param symbol_name name of the resulting symbol
 * \param indices array of locations where to set on_value
 * \param depth Depth of the one hot dimension.
 * \param on_value The value assigned to the locations represented by indices.
 * \param off_value The value assigned to the locations not represented by indices.
 * \param dtype DType of the output
 * \return new symbol
 */
inline Symbol one_hot(const std::string& symbol_name,
                      Symbol indices,
                      int depth,
                      double on_value = 1,
                      double off_value = 0,
                      One_hotDtype dtype = One_hotDtype::kFloat32) {
  static const char *One_hotDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("one_hot")
           .SetParam("depth", depth)
           .SetParam("on_value", on_value)
           .SetParam("off_value", off_value)
           .SetParam("dtype", One_hotDtypeValues[int(dtype)])
           .SetInput("indices", indices)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Gather elements or slices from `data` and store to a tensor whose
 *        shape is defined by `indices`.
 *
 *        Given `data` with shape `(X_0, X_1, ..., X_{N-1})` and indices with shape
 *        `(M, Y_0, ..., Y_{K-1})`, the output will have shape `(Y_0, ..., Y_{K-1}, X_M,
 *        where `M <= N`. If `M == N`, output shape will simply be `(Y_0, ..., Y_{K-1})`.
 *
 *        The elements in output is defined as follows::
 *
 *        output[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}] = data[indices[0, y_0, ...,
 *        ...,
 *        indices[M-1, y_0, ..., y_{K-1}],
 *        x_M, ..., x_{N-1}]
 *
 *        Examples::
 *
 *        data = [[0, 1], [2, 3]]
 *        indices = [[1, 1, 0], [0, 1, 0]]
 *        gather_nd(data, indices) = [2, 3, 0]
 *
 *        data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
 *        indices = [[0, 1], [1, 0]]
 *        gather_nd(data, indices) = [[3, 4], [5, 6]]
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data data
 * \param indices indices
 * \return new symbol
 */
inline Symbol gather_nd(const std::string& symbol_name,
                        Symbol data,
                        Symbol indices) {
  return Operator("gather_nd")
           .SetInput("data", data)
           .SetInput("indices", indices)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Scatters data into a new tensor according to indices.
 *
 *        Given `data` with shape `(Y_0, ..., Y_{K-1}, X_M, ..., X_{N-1})` and indices
 *        `(M, Y_0, ..., Y_{K-1})`, the output will have shape `(X_0, X_1, ..., X_{N-1})`,
 *        where `M <= N`. If `M == N`, data shape should simply be `(Y_0, ..., Y_{K-1})`.
 *
 *        The elements in output is defined as follows::
 *
 *        output[indices[0, y_0, ..., y_{K-1}],
 *        ...,
 *        indices[M-1, y_0, ..., y_{K-1}],
 *        x_M, ..., x_{N-1}] = data[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}]
 *
 *        all other entries in output are 0.
 *
 *        .. warning::
 *
 *        If the indices have duplicates, the result will be non-deterministic and
 *        the gradient of `scatter_nd` will not be correct!!
 *
 *
 *        Examples::
 *
 *        data = [2, 3, 0]
 *        indices = [[1, 1, 0], [0, 1, 0]]
 *        shape = (2, 2)
 *        scatter_nd(data, indices, shape) = [[0, 0], [2, 3]]
 *
 *        data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
 *        indices = [[0, 1], [1, 1]]
 *        shape = (2, 2, 2, 2)
 *        scatter_nd(data, indices, shape) = [[[[0, 0],
 *        [0, 0]],
 *
 *        [[1, 2],
 *        [3, 4]]],
 *
 *        [[[0, 0],
 *        [0, 0]],
 *
 *        [[5, 6],
 *        [7, 8]]]]
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data data
 * \param indices indices
 * \param shape Shape of output.
 * \return new symbol
 */
inline Symbol scatter_nd(const std::string& symbol_name,
                         Symbol data,
                         Symbol indices,
                         Shape shape) {
  return Operator("scatter_nd")
           .SetParam("shape", shape)
           .SetInput("data", data)
           .SetInput("indices", indices)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operator has the same functionality as scatter_nd
 *        except that it does not reset the elements not indexed by the input
 *        index `NDArray` in the input data `NDArray`. output should be explicitly
 *        given and be the same as lhs.
 *
 *        .. note:: This operator is for internal use only.
 *
 *        Examples::
 *
 *        data = [2, 3, 0]
 *        indices = [[1, 1, 0], [0, 1, 0]]
 *        out = [[1, 1], [1, 1]]
 *        _scatter_set_nd(lhs=out, rhs=data, indices=indices, out=out)
 *        out = [[0, 1], [2, 3]]
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param lhs source input
 * \param rhs value to assign
 * \param indices indices
 * \param shape Shape of output.
 * \return new symbol
 */
inline Symbol _scatter_set_nd(const std::string& symbol_name,
                              Symbol lhs,
                              Symbol rhs,
                              Symbol indices,
                              Shape shape) {
  return Operator("_scatter_set_nd")
           .SetParam("shape", shape)
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .SetInput("indices", indices)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of element-wise **equal to** (==) comparison operation with
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_equal(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L46
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_equal(const std::string& symbol_name,
                              Symbol lhs,
                              Symbol rhs) {
  return Operator("broadcast_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of element-wise **not equal to** (!=) comparison operation
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_not_equal(x, y) = [[ 1.,  1.,  1.],
 *        [ 0.,  0.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L64
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_not_equal(const std::string& symbol_name,
                                  Symbol lhs,
                                  Symbol rhs) {
  return Operator("broadcast_not_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of element-wise **greater than** (>) comparison operation
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_greater(x, y) = [[ 1.,  1.,  1.],
 *        [ 0.,  0.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L82
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_greater(const std::string& symbol_name,
                                Symbol lhs,
                                Symbol rhs) {
  return Operator("broadcast_greater")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of element-wise **greater than or equal to** (>=) comparison
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_greater_equal(x, y) = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L100
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_greater_equal(const std::string& symbol_name,
                                      Symbol lhs,
                                      Symbol rhs) {
  return Operator("broadcast_greater_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of element-wise **lesser than** (<) comparison operation
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_lesser(x, y) = [[ 0.,  0.,  0.],
 *        [ 0.,  0.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L118
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_lesser(const std::string& symbol_name,
                               Symbol lhs,
                               Symbol rhs) {
  return Operator("broadcast_lesser")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of element-wise **lesser than or equal to** (<=) comparison
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_lesser_equal(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L136
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_lesser_equal(const std::string& symbol_name,
                                     Symbol lhs,
                                     Symbol rhs) {
  return Operator("broadcast_lesser_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of element-wise **logical and** with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_logical_and(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L154
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_logical_and(const std::string& symbol_name,
                                    Symbol lhs,
                                    Symbol rhs) {
  return Operator("broadcast_logical_and")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of element-wise **logical or** with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  0.],
 *        [ 1.,  1.,  0.]]
 *
 *        y = [[ 1.],
 *        [ 0.]]
 *
 *        broadcast_logical_or(x, y) = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L172
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_logical_or(const std::string& symbol_name,
                                   Symbol lhs,
                                   Symbol rhs) {
  return Operator("broadcast_logical_or")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the result of element-wise **logical xor** with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  0.],
 *        [ 1.,  1.,  0.]]
 *
 *        y = [[ 1.],
 *        [ 0.]]
 *
 *        broadcast_logical_xor(x, y) = [[ 0.,  0.,  1.],
 *        [ 1.,  1.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L190
 * \param symbol_name name of the resulting symbol
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_logical_xor(const std::string& symbol_name,
                                    Symbol lhs,
                                    Symbol rhs) {
  return Operator("broadcast_logical_xor")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Extracts a diagonal or constructs a diagonal array.
 *
 *        ``diag``'s behavior depends on the input array dimensions:
 *
 *        - 1-D arrays: constructs a 2-D array with the input as its diagonal, all other
 *        - N-D arrays: extracts the diagonals of the sub-arrays with axes specified by
 *        The output shape would be decided by removing the axes numbered ``axis1`` and
 *        input shape and appending to the result a new axis with the size of the
 *
 *        For example, when the input shape is `(2, 3, 4, 5)`, ``axis1`` and ``axis2``
 *        respectively and ``k`` is 0, the resulting shape would be `(3, 5, 2)`.
 *
 *        Examples::
 *
 *        x = [[1, 2, 3],
 *        [4, 5, 6]]
 *
 *        diag(x) = [1, 5]
 *
 *        diag(x, k=1) = [2, 6]
 *
 *        diag(x, k=-1) = [4]
 *
 *        x = [1, 2, 3]
 *
 *        diag(x) = [[1, 0, 0],
 *        [0, 2, 0],
 *        [0, 0, 3]]
 *
 *        diag(x, k=1) = [[0, 1, 0],
 *        [0, 0, 2],
 *        [0, 0, 0]]
 *
 *        diag(x, k=-1) = [[0, 0, 0],
 *        [1, 0, 0],
 *        [0, 2, 0]]
 *
 *        x = [[[1, 2],
 *        [3, 4]],
 *
 *        [[5, 6],
 *        [7, 8]]]
 *
 *        diag(x) = [[1, 7],
 *        [2, 8]]
 *
 *        diag(x, k=1) = [[3],
 *        [4]]
 *
 *        diag(x, axis1=-2, axis2=-1) = [[1, 4],
 *        [5, 8]]
 *
 *
 *
 *        Defined in src/operator/tensor/diag_op.cc:L87
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \param k Diagonal in question. The default is 0. Use k>0 for diagonals above the main
 *        diagonal, and k<0 for diagonals below the main diagonal. If input has shape (S0
 * \param axis1 The first axis of the sub-arrays of interest. Ignored when the input is a
 * \param axis2 The second axis of the sub-arrays of interest. Ignored when the input is
 * \return new symbol
 */
inline Symbol diag(const std::string& symbol_name,
                   Symbol data,
                   int k = 0,
                   int axis1 = 0,
                   int axis2 = 1) {
  return Operator("diag")
           .SetParam("k", k)
           .SetParam("axis1", axis1)
           .SetParam("axis2", axis2)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the sum of array elements over given axes.
 *
 *        .. Note::
 *
 *        `sum` and `sum_axis` are equivalent.
 *        For ndarray of csr storage type summation along axis 0 and axis 1 is supported.
 *        Setting keepdims or exclude to True will cause a fallback to dense operator.
 *
 *        Example::
 *
 *        data = [[[1, 2], [2, 3], [1, 3]],
 *        [[1, 4], [4, 3], [5, 2]],
 *        [[7, 1], [7, 2], [7, 3]]]
 *
 *        sum(data, axis=1)
 *        [[  4.   8.]
 *        [ 10.   9.]
 *        [ 21.   6.]]
 *
 *        sum(data, axis=[1,2])
 *        [ 12.  19.  27.]
 *
 *        data = [[1, 2, 0],
 *        [3, 0, 1],
 *        [4, 1, 0]]
 *
 *        csr = cast_storage(data, 'csr')
 *
 *        sum(csr, axis=0)
 *        [ 8.  3.  1.]
 *
 *        sum(csr, axis=1)
 *        [ 3.  4.  5.]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L116
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol sum(const std::string& symbol_name,
                  Symbol data,
                  dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                  bool keepdims = false,
                  bool exclude = false) {
  return Operator("sum")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the mean of array elements over given axes.
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L132
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol mean(const std::string& symbol_name,
                   Symbol data,
                   dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                   bool keepdims = false,
                   bool exclude = false) {
  return Operator("mean")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the product of array elements over given axes.
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L147
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol prod(const std::string& symbol_name,
                   Symbol data,
                   dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                   bool keepdims = false,
                   bool exclude = false) {
  return Operator("prod")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the sum of array elements over given axes treating Not a Numbers
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L162
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol nansum(const std::string& symbol_name,
                     Symbol data,
                     dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                     bool keepdims = false,
                     bool exclude = false) {
  return Operator("nansum")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the product of array elements over given axes treating Not a Numbers
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L177
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol nanprod(const std::string& symbol_name,
                      Symbol data,
                      dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                      bool keepdims = false,
                      bool exclude = false) {
  return Operator("nanprod")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the max of array elements over given axes.
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L191
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol max(const std::string& symbol_name,
                  Symbol data,
                  dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                  bool keepdims = false,
                  bool exclude = false) {
  return Operator("max")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the min of array elements over given axes.
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L205
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol min(const std::string& symbol_name,
                  Symbol data,
                  dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                  bool keepdims = false,
                  bool exclude = false) {
  return Operator("min")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Broadcasts the input array over particular axes.
 *
 *        Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
 *        `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.
 *
 *        Example::
 *
 *        // given x of shape (1,2,1)
 *        x = [[[ 1.],
 *        [ 2.]]]
 *
 *        // broadcast x on on axis 2
 *        broadcast_axis(x, axis=2, size=3) = [[[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]]]
 *        // broadcast x on on axes 0 and 2
 *        broadcast_axis(x, axis=(0,2), size=(2,3)) = [[[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]],
 *        [[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]]]
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L238
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param axis The axes to perform the broadcasting.
 * \param size Target sizes of the broadcasting axes.
 * \return new symbol
 */
inline Symbol broadcast_axis(const std::string& symbol_name,
                             Symbol data,
                             Shape axis = {},
                             Shape size = {}) {
  return Operator("broadcast_axis")
           .SetParam("axis", axis)
           .SetParam("size", size)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Broadcasts the input array to a new shape.
 *
 *        Broadcasting is a mechanism that allows NDArrays to perform arithmetic
 *        with arrays of different shapes efficiently without creating multiple copies of
 *        Also see, `Broadcasting
 *        <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more
 *
 *        Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
 *        `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.
 *
 *        For example::
 *
 *        broadcast_to([[1,2,3]], shape=(2,3)) = [[ 1.,  2.,  3.],
 *        [ 1.,  2.,  3.]])
 *
 *        The dimension which you do not want to change can also be kept as `0` which
 *        So with `shape=(2,0)`, we will obtain the same result as in the above example.
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L262
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param shape The shape of the desired array. We can set the dim to zero if it's same
 *        as the original. E.g `A = broadcast_to(B, shape=(10, 0, 0))` has the same
 * \return new symbol
 */
inline Symbol broadcast_to(const std::string& symbol_name,
                           Symbol data,
                           Shape shape = {}) {
  return Operator("broadcast_to")
           .SetParam("shape", shape)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \return new symbol
 */
inline Symbol _broadcast_backward(const std::string& symbol_name) {
  return Operator("_broadcast_backward")
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Broadcasts lhs to have the same shape as rhs.
 *
 *        Broadcasting is a mechanism that allows NDArrays to perform arithmetic
 *        with arrays of different shapes efficiently without creating multiple copies of
 *        Also see, `Broadcasting
 *        <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more
 *
 *        Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
 *        `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.
 *
 *        For example::
 *
 *        broadcast_like([[1,2,3]], [[5,6,7],[7,8,9]]) = [[ 1.,  2.,  3.],
 *        [ 1.,  2.,  3.]])
 *
 *        broadcast_like([9], [1,2,3,4,5], lhs_axes=(0,), rhs_axes=(-1,)) = [9,9,9,9,9]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L315
 * \param symbol_name name of the resulting symbol
 * \param lhs First input.
 * \param rhs Second input.
 * \param lhs_axes Axes to perform broadcast on in the first input array
 * \param rhs_axes Axes to copy from the second input array
 * \return new symbol
 */
inline Symbol broadcast_like(const std::string& symbol_name,
                             Symbol lhs,
                             Symbol rhs,
                             dmlc::optional<Shape> lhs_axes = dmlc::optional<Shape>(),
                             dmlc::optional<Shape> rhs_axes = dmlc::optional<Shape>()) {
  return Operator("broadcast_like")
           .SetParam("lhs_axes", lhs_axes)
           .SetParam("rhs_axes", rhs_axes)
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*! \brief The data type of the output.
 */
enum class NormOutDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3,
  kInt32 = 4,
  kInt64 = 5,
  kInt8 = 6
};

/*!
 * \brief Computes the norm on an NDArray.
 *
 *        This operator computes the norm on an NDArray with the specified axis, depending
 *        on the value of the ord parameter. By default, it computes the L2 norm on the
 *        array. Currently only ord=2 supports sparse ndarrays.
 *
 *        Examples::
 *
 *        x = [[[1, 2],
 *        [3, 4]],
 *        [[2, 2],
 *        [5, 6]]]
 *
 *        norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ]
 *        [5.3851647 6.3245554]]
 *
 *        norm(x, ord=1, axis=1) = [[4., 6.],
 *        [7., 8.]]
 *
 *        rsp = x.cast_storage('row_sparse')
 *
 *        norm(rsp) = [5.47722578]
 *
 *        csr = x.cast_storage('csr')
 *
 *        norm(csr) = [5.47722578]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L350
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param ord Order of the norm. Currently ord=1 and ord=2 is supported.
 * \param axis The axis or axes along which to perform the reduction.
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *        If `axis` is int, a reduction is performed on a particular axis.
 *        If `axis` is a 2-tuple, it specifies the axes that hold 2-D matrices,
 *        and the matrix norms of these matrices are computed.
 * \param out_dtype The data type of the output.
 * \param keepdims If this is set to `True`, the reduced axis is left in the result as
 * \return new symbol
 */
inline Symbol norm(const std::string& symbol_name,
                   Symbol data,
                   int ord = 2,
                   dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                   NormOutDtype out_dtype = NormOutDtype::kNone,
                   bool keepdims = false) {
  static const char *NormOutDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8"
  };
  return Operator("norm")
           .SetParam("ord", ord)
           .SetParam("axis", axis)
           .SetParam("out_dtype", NormOutDtypeValues[int(out_dtype)])
           .SetParam("keepdims", keepdims)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _equal(const std::string& symbol_name,
                     Symbol lhs,
                     Symbol rhs) {
  return Operator("_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _not_equal(const std::string& symbol_name,
                         Symbol lhs,
                         Symbol rhs) {
  return Operator("_not_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _greater(const std::string& symbol_name,
                       Symbol lhs,
                       Symbol rhs) {
  return Operator("_greater")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _greater_equal(const std::string& symbol_name,
                             Symbol lhs,
                             Symbol rhs) {
  return Operator("_greater_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _lesser(const std::string& symbol_name,
                      Symbol lhs,
                      Symbol rhs) {
  return Operator("_lesser")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _lesser_equal(const std::string& symbol_name,
                            Symbol lhs,
                            Symbol rhs) {
  return Operator("_lesser_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _logical_and(const std::string& symbol_name,
                           Symbol lhs,
                           Symbol rhs) {
  return Operator("_logical_and")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _logical_or(const std::string& symbol_name,
                          Symbol lhs,
                          Symbol rhs) {
  return Operator("_logical_or")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _logical_xor(const std::string& symbol_name,
                           Symbol lhs,
                           Symbol rhs) {
  return Operator("_logical_xor")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Return the elements, either from x or y, depending on the condition.
 *
 *        Given three ndarrays, condition, x, and y, return an ndarray with the elements
 *        depending on the elements from condition are true or false. x and y must have
 *        If condition has the same shape as x, each element in the output array is from
 *        corresponding element in the condition is true, and from y if false.
 *
 *        If condition does not have the same shape as x, it must be a 1D array whose
 *        the same as x's first dimension size. Each row of the output array is from x's
 *        if the corresponding element from condition is true, and from y's row if false.
 *
 *        Note that all non-zero values are interpreted as ``True`` in condition.
 *
 *        Examples::
 *
 *        x = [[1, 2], [3, 4]]
 *        y = [[5, 6], [7, 8]]
 *        cond = [[0, 1], [-1, 0]]
 *
 *        where(cond, x, y) = [[5, 2], [3, 8]]
 *
 *        csr_cond = cast_storage(cond, 'csr')
 *
 *        where(csr_cond, x, y) = [[5, 2], [3, 8]]
 *
 *
 *
 *        Defined in src/operator/tensor/control_flow_op.cc:L57
 * \param symbol_name name of the resulting symbol
 * \param condition condition array
 * \param x
 * \param y
 * \return new symbol
 */
inline Symbol where(const std::string& symbol_name,
                    Symbol condition,
                    Symbol x,
                    Symbol y) {
  return Operator("where")
           .SetInput("condition", condition)
           .SetInput("x", x)
           .SetInput("y", y)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _maximum_scalar(const std::string& symbol_name,
                              Symbol data,
                              mx_float scalar) {
  return Operator("_maximum_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _minimum_scalar(const std::string& symbol_name,
                              Symbol data,
                              mx_float scalar) {
  return Operator("_minimum_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _power_scalar(const std::string& symbol_name,
                            Symbol data,
                            mx_float scalar) {
  return Operator("_power_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _rpower_scalar(const std::string& symbol_name,
                             Symbol data,
                             mx_float scalar) {
  return Operator("_rpower_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _hypot_scalar(const std::string& symbol_name,
                            Symbol data,
                            mx_float scalar) {
  return Operator("_hypot_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Calculate Smooth L1 Loss(lhs, scalar) by summing
 *
 *        .. math::
 *
 *        f(x) =
 *        \begin{cases}
 *        (\sigma x)^2/2,& \text{if }x < 1/\sigma^2\\
 *        |x|-0.5/\sigma^2,& \text{otherwise}
 *        \end{cases}
 *
 *        where :math:`x` is an element of the tensor *lhs* and :math:`\sigma` is the
 *
 *        Example::
 *
 *        smooth_l1([1, 2, 3, 4]) = [0.5, 1.5, 2.5, 3.5]
 *        smooth_l1([1, 2, 3, 4], scalar=1) = [0.5, 1.5, 2.5, 3.5]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_scalar_op_extended.cc:L104
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol smooth_l1(const std::string& symbol_name,
                        Symbol data,
                        mx_float scalar) {
  return Operator("smooth_l1")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Reshapes the input array.
 *
 *        .. note:: ``Reshape`` is deprecated, use ``reshape``
 *
 *        Given an array and a shape, this function returns a copy of the array in the
 *        The shape is a tuple of integers such as (2,3,4). The size of the new shape
 *
 *        Example::
 *
 *        reshape([1,2,3,4], shape=(2,2)) = [[1,2], [3,4]]
 *
 *        Some dimensions of the shape can take special values from the set {0, -1, -2,
 *
 *        - ``0``  copy this dimension from the input to the output shape.
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (4,0,2), output shape = (4,3,2)
 *        - input shape = (2,3,4), shape = (2,0,0), output shape = (2,3,4)
 *
 *        - ``-1`` infers the dimension of the output shape by using the remainder of the
 *        keeping the size of the new array same as that of the input array.
 *        At most one dimension of shape can be -1.
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (6,1,-1), output shape = (6,1,4)
 *        - input shape = (2,3,4), shape = (3,-1,8), output shape = (3,1,8)
 *        - input shape = (2,3,4), shape=(-1,), output shape = (24,)
 *
 *        - ``-2`` copy all/remainder of the input dimensions to the output shape.
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (-2,), output shape = (2,3,4)
 *        - input shape = (2,3,4), shape = (2,-2), output shape = (2,3,4)
 *        - input shape = (2,3,4), shape = (-2,1,1), output shape = (2,3,4,1,1)
 *
 *        - ``-3`` use the product of two consecutive dimensions of the input shape as
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (-3,4), output shape = (6,4)
 *        - input shape = (2,3,4,5), shape = (-3,-3), output shape = (6,20)
 *        - input shape = (2,3,4), shape = (0,-3), output shape = (2,12)
 *        - input shape = (2,3,4), shape = (-3,-2), output shape = (6,4)
 *
 *        - ``-4`` split one dimension of the input into two dimensions passed subsequent
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (-4,1,2,-2), output shape =(1,2,3,4)
 *        - input shape = (2,3,4), shape = (2,-4,-1,3,-2), output shape = (2,1,3,4)
 *
 *        If the argument `reverse` is set to 1, then the special values are inferred
 *
 *        Example::
 *
 *        - without reverse=1, for input shape = (10,5,4), shape = (-1,0), output shape
 *        - with reverse=1, output shape will be (50,4).
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L202
 * \param symbol_name name of the resulting symbol
 * \param data Input data to reshape.
 * \param shape The target shape
 * \param reverse If true then the special values are inferred from right to left
 * \param target_shape (Deprecated! Use ``shape`` instead.) Target new shape. One and
 * \param keep_highest (Deprecated! Use ``shape`` instead.) Whether keep the highest dim
 *        unchanged.If set to true, then the first dim in target_shape is ignored,and
 * \return new symbol
 */
inline Symbol Reshape(const std::string& symbol_name,
                      Symbol data,
                      Shape shape = {},
                      bool reverse = false,
                      Shape target_shape = {},
                      bool keep_highest = false) {
  return Operator("Reshape")
           .SetParam("shape", shape)
           .SetParam("reverse", reverse)
           .SetParam("target_shape", target_shape)
           .SetParam("keep_highest", keep_highest)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Flattens the input array into a 2-D array by collapsing the higher dimensions.
 *
 *        .. note:: `Flatten` is deprecated. Use `flatten` instead.
 *
 *        For an input array with shape ``(d1, d2, ..., dk)``, `flatten` operation
 *        the input array into an output array of shape ``(d1, d2*...*dk)``.
 *
 *        Note that the bahavior of this function is different from numpy.ndarray.flatten,
 *        which behaves similar to mxnet.ndarray.reshape((-1,)).
 *
 *        Example::
 *
 *        x = [[
 *        [1,2,3],
 *        [4,5,6],
 *        [7,8,9]
 *        ],
 *        [    [1,2,3],
 *        [4,5,6],
 *        [7,8,9]
 *        ]],
 *
 *        flatten(x) = [[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
 *        [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L291
 * \param symbol_name name of the resulting symbol
 * \param data Input array.
 * \return new symbol
 */
inline Symbol Flatten(const std::string& symbol_name,
                      Symbol data) {
  return Operator("Flatten")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Permutes the dimensions of an array.
 *
 *        Examples::
 *
 *        x = [[ 1, 2],
 *        [ 3, 4]]
 *
 *        transpose(x) = [[ 1.,  3.],
 *        [ 2.,  4.]]
 *
 *        x = [[[ 1.,  2.],
 *        [ 3.,  4.]],
 *
 *        [[ 5.,  6.],
 *        [ 7.,  8.]]]
 *
 *        transpose(x) = [[[ 1.,  5.],
 *        [ 3.,  7.]],
 *
 *        [[ 2.,  6.],
 *        [ 4.,  8.]]]
 *
 *        transpose(x, axes=(1,0,2)) = [[[ 1.,  2.],
 *        [ 5.,  6.]],
 *
 *        [[ 3.,  4.],
 *        [ 7.,  8.]]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L375
 * \param symbol_name name of the resulting symbol
 * \param data Source input
 * \param axes Target axis order. By default the axes will be inverted.
 * \return new symbol
 */
inline Symbol transpose(const std::string& symbol_name,
                        Symbol data,
                        Shape axes = {}) {
  return Operator("transpose")
           .SetParam("axes", axes)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Inserts a new axis of size 1 into the array shape
 *
 *        For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1)``
 *        will return a new array with shape ``(2,1,3,4)``.
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L416
 * \param symbol_name name of the resulting symbol
 * \param data Source input
 * \param axis Position where new axis is to be inserted. Suppose that the input
 *        `NDArray`'s dimension is `ndim`, the range of the inserted axis is `[-ndim,
 * \return new symbol
 */
inline Symbol expand_dims(const std::string& symbol_name,
                          Symbol data,
                          int axis) {
  return Operator("expand_dims")
           .SetParam("axis", axis)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Slices a region of the array.
 *
 *        .. note:: ``crop`` is deprecated. Use ``slice`` instead.
 *
 *        This function returns a sliced array between the indices given
 *        by `begin` and `end` with the corresponding `step`.
 *
 *        For an input array of ``shape=(d_0, d_1, ..., d_n-1)``,
 *        slice operation with ``begin=(b_0, b_1...b_m-1)``,
 *        ``end=(e_0, e_1, ..., e_m-1)``, and ``step=(s_0, s_1, ..., s_m-1)``,
 *        where m <= n, results in an array with the shape
 *        ``(|e_0-b_0|/|s_0|, ..., |e_m-1-b_m-1|/|s_m-1|, d_m, ..., d_n-1)``.
 *
 *        The resulting array's *k*-th dimension contains elements
 *        from the *k*-th dimension of the input array starting
 *        from index ``b_k`` (inclusive) with step ``s_k``
 *        until reaching ``e_k`` (exclusive).
 *
 *        If the *k*-th elements are `None` in the sequence of `begin`, `end`,
 *        and `step`, the following rule will be used to set default values.
 *        If `s_k` is `None`, set `s_k=1`. If `s_k > 0`, set `b_k=0`, `e_k=d_k`;
 *        else, set `b_k=d_k-1`, `e_k=-1`.
 *
 *        The storage type of ``slice`` output depends on storage types of inputs
 *
 *        - slice(csr) = csr
 *        - otherwise, ``slice`` generates output with default storage
 *
 *        .. note:: When input data storage type is csr, it only supports
 *        step=(), or step=(None,), or step=(1,) to generate a csr output.
 *        For other step parameter values, it falls back to slicing
 *        a dense tensor.
 *
 *        Example::
 *
 *        x = [[  1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.],
 *        [  9.,  10.,  11.,  12.]]
 *
 *        slice(x, begin=(0,1), end=(2,4)) = [[ 2.,  3.,  4.],
 *        [ 6.,  7.,  8.]]
 *        slice(x, begin=(None, 0), end=(None, 3), step=(-1, 2)) = [[9., 11.],
 *        [5.,  7.],
 *        [1.,  3.]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L506
 * \param symbol_name name of the resulting symbol
 * \param data Source input
 * \param begin starting indices for the slice operation, supports negative indices.
 * \param end ending indices for the slice operation, supports negative indices.
 * \param step step for the slice operation, supports negative values.
 * \return new symbol
 */
inline Symbol slice(const std::string& symbol_name,
                    Symbol data,
                    Shape begin,
                    Shape end,
                    Shape step = {}) {
  return Operator("slice")
           .SetParam("begin", begin)
           .SetParam("end", end)
           .SetParam("step", step)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Assign the rhs to a cropped subset of lhs.
 *
 *        Requirements
 *        ------------
 *        - output should be explicitly given and be the same as lhs.
 *        - lhs and rhs are of the same data type, and on the same device.
 *
 *
 *        From:src/operator/tensor/matrix_op.cc:531
 * \param symbol_name name of the resulting symbol
 * \param lhs Source input
 * \param rhs value to assign
 * \param begin starting indices for the slice operation, supports negative indices.
 * \param end ending indices for the slice operation, supports negative indices.
 * \param step step for the slice operation, supports negative values.
 * \return new symbol
 */
inline Symbol _slice_assign(const std::string& symbol_name,
                            Symbol lhs,
                            Symbol rhs,
                            Shape begin,
                            Shape end,
                            Shape step = {}) {
  return Operator("_slice_assign")
           .SetParam("begin", begin)
           .SetParam("end", end)
           .SetParam("step", step)
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief (Assign the scalar to a cropped subset of the input.
 *
 *        Requirements
 *        ------------
 *        - output should be explicitly given and be the same as input
 *        )
 *
 *        From:src/operator/tensor/matrix_op.cc:556
 * \param symbol_name name of the resulting symbol
 * \param data Source input
 * \param begin starting indices for the slice operation, supports negative indices.
 * \param end ending indices for the slice operation, supports negative indices.
 * \param scalar The scalar value for assignment.
 * \param step step for the slice operation, supports negative values.
 * \return new symbol
 */
inline Symbol _slice_assign_scalar(const std::string& symbol_name,
                                   Symbol data,
                                   Shape begin,
                                   Shape end,
                                   double scalar = 0,
                                   Shape step = {}) {
  return Operator("_slice_assign_scalar")
           .SetParam("begin", begin)
           .SetParam("end", end)
           .SetParam("scalar", scalar)
           .SetParam("step", step)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Slices along a given axis.
 *
 *        Returns an array slice along a given `axis` starting from the `begin` index
 *        to the `end` index.
 *
 *        Examples::
 *
 *        x = [[  1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.],
 *        [  9.,  10.,  11.,  12.]]
 *
 *        slice_axis(x, axis=0, begin=1, end=3) = [[  5.,   6.,   7.,   8.],
 *        [  9.,  10.,  11.,  12.]]
 *
 *        slice_axis(x, axis=1, begin=0, end=2) = [[  1.,   2.],
 *        [  5.,   6.],
 *        [  9.,  10.]]
 *
 *        slice_axis(x, axis=1, begin=-3, end=-1) = [[  2.,   3.],
 *        [  6.,   7.],
 *        [ 10.,  11.]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L596
 * \param symbol_name name of the resulting symbol
 * \param data Source input
 * \param axis Axis along which to be sliced, supports negative indexes.
 * \param begin The beginning index along the axis to be sliced,  supports negative
 * \param end The ending index along the axis to be sliced,  supports negative indexes.
 * \return new symbol
 */
inline Symbol slice_axis(const std::string& symbol_name,
                         Symbol data,
                         int axis,
                         int begin,
                         dmlc::optional<int> end) {
  return Operator("slice_axis")
           .SetParam("axis", axis)
           .SetParam("begin", begin)
           .SetParam("end", end)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Slices a region of the array like the shape of another array.
 *
 *        This function is similar to ``slice``, however, the `begin` are always `0`s
 *        and `end` of specific axes are inferred from the second input `shape_like`.
 *
 *        Given the second `shape_like` input of ``shape=(d_0, d_1, ..., d_n-1)``,
 *        a ``slice_like`` operator with default empty `axes`, it performs the
 *        following operation:
 *
 *        `` out = slice(input, begin=(0, 0, ..., 0), end=(d_0, d_1, ..., d_n-1))``.
 *
 *        When `axes` is not empty, it is used to speficy which axes are being sliced.
 *
 *        Given a 4-d input data, ``slice_like`` operator with ``axes=(0, 2, -1)``
 *        will perform the following operation:
 *
 *        `` out = slice(input, begin=(0, 0, 0, 0), end=(d_0, None, d_2, d_3))``.
 *
 *        Note that it is allowed to have first and second input with different
 *        however, you have to make sure the `axes` are specified and not exceeding the
 *        dimension limits.
 *
 *        For example, given `input_1` with ``shape=(2,3,4,5)`` and `input_2` with
 *        ``shape=(1,2,3)``, it is not allowed to use:
 *
 *        `` out = slice_like(a, b)`` because ndim of `input_1` is 4, and ndim of
 *        is 3.
 *
 *        The following is allowed in this situation:
 *
 *        `` out = slice_like(a, b, axes=(0, 2))``
 *
 *        Example::
 *
 *        x = [[  1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.],
 *        [  9.,  10.,  11.,  12.]]
 *
 *        y = [[  0.,   0.,   0.],
 *        [  0.,   0.,   0.]]
 *
 *        slice_like(x, y) = [[ 1.,  2.,  3.]
 *        [ 5.,  6.,  7.]]
 *        slice_like(x, y, axes=(0, 1)) = [[ 1.,  2.,  3.]
 *        [ 5.,  6.,  7.]]
 *        slice_like(x, y, axes=(0)) = [[ 1.,  2.,  3.,  4.]
 *        [ 5.,  6.,  7.,  8.]]
 *        slice_like(x, y, axes=(-1)) = [[  1.,   2.,   3.]
 *        [  5.,   6.,   7.]
 *        [  9.,  10.,  11.]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L665
 * \param symbol_name name of the resulting symbol
 * \param data Source input
 * \param shape_like Shape like input
 * \param axes List of axes on which input data will be sliced according to the
 *        corresponding size of the second input. By default will slice on all axes.
 * \return new symbol
 */
inline Symbol slice_like(const std::string& symbol_name,
                         Symbol data,
                         Symbol shape_like,
                         Shape axes = {}) {
  return Operator("slice_like")
           .SetParam("axes", axes)
           .SetInput("data", data)
           .SetInput("shape_like", shape_like)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Clips (limits) the values in an array.
 *
 *        Given an interval, values outside the interval are clipped to the interval
 *        Clipping ``x`` between `a_min` and `a_x` would be::
 *
 *        clip(x, a_min, a_max) = max(min(x, a_max), a_min))
 *
 *        Example::
 *
 *        x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 *
 *        clip(x,1,8) = [ 1.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.]
 *
 *        The storage type of ``clip`` output depends on storage types of inputs and the
 *        parameter values:
 *
 *        - clip(default) = default
 *        - clip(row_sparse, a_min <= 0, a_max >= 0) = row_sparse
 *        - clip(csr, a_min <= 0, a_max >= 0) = csr
 *        - clip(row_sparse, a_min < 0, a_max < 0) = default
 *        - clip(row_sparse, a_min > 0, a_max > 0) = default
 *        - clip(csr, a_min < 0, a_max < 0) = csr
 *        - clip(csr, a_min > 0, a_max > 0) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L723
 * \param symbol_name name of the resulting symbol
 * \param data Input array.
 * \param a_min Minimum value
 * \param a_max Maximum value
 * \return new symbol
 */
inline Symbol clip(const std::string& symbol_name,
                   Symbol data,
                   mx_float a_min,
                   mx_float a_max) {
  return Operator("clip")
           .SetParam("a_min", a_min)
           .SetParam("a_max", a_max)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Repeats elements of an array.
 *
 *        By default, ``repeat`` flattens the input array into 1-D and then repeats the
 *        elements::
 *
 *        x = [[ 1, 2],
 *        [ 3, 4]]
 *
 *        repeat(x, repeats=2) = [ 1.,  1.,  2.,  2.,  3.,  3.,  4.,  4.]
 *
 *        The parameter ``axis`` specifies the axis along which to perform repeat::
 *
 *        repeat(x, repeats=2, axis=1) = [[ 1.,  1.,  2.,  2.],
 *        [ 3.,  3.,  4.,  4.]]
 *
 *        repeat(x, repeats=2, axis=0) = [[ 1.,  2.],
 *        [ 1.,  2.],
 *        [ 3.,  4.],
 *        [ 3.,  4.]]
 *
 *        repeat(x, repeats=2, axis=-1) = [[ 1.,  1.,  2.,  2.],
 *        [ 3.,  3.,  4.,  4.]]
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L796
 * \param symbol_name name of the resulting symbol
 * \param data Input data array
 * \param repeats The number of repetitions for each element.
 * \param axis The axis along which to repeat values. The negative numbers are
 *        interpreted counting from the backward. By default, use the flattened input
 * \return new symbol
 */
inline Symbol repeat(const std::string& symbol_name,
                     Symbol data,
                     int repeats,
                     dmlc::optional<int> axis = dmlc::optional<int>()) {
  return Operator("repeat")
           .SetParam("repeats", repeats)
           .SetParam("axis", axis)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Repeats the whole array multiple times.
 *
 *        If ``reps`` has length *d*, and input array has dimension of *n*. There are
 *        three cases:
 *
 *        - **n=d**. Repeat *i*-th dimension of the input by ``reps[i]`` times::
 *
 *        x = [[1, 2],
 *        [3, 4]]
 *
 *        tile(x, reps=(2,3)) = [[ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.],
 *        [ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.]]
 *
 *        - **n>d**. ``reps`` is promoted to length *n* by pre-pending 1's to it. Thus for
 *        an input shape ``(2,3)``, ``repos=(2,)`` is treated as ``(1,2)``::
 *
 *
 *        tile(x, reps=(2,)) = [[ 1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.]]
 *
 *        - **n<d**. The input is promoted to be d-dimensional by prepending new axes. So
 *        shape ``(2,2)`` array is promoted to ``(1,2,2)`` for 3-D replication::
 *
 *        tile(x, reps=(2,2,3)) = [[[ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.],
 *        [ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.]],
 *
 *        [[ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.],
 *        [ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.]]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L857
 * \param symbol_name name of the resulting symbol
 * \param data Input data array
 * \param reps The number of times for repeating the tensor a. Each dim size of reps must
 *        be a positive integer. If reps has length d, the result will have dimension of
 *        max(d, a.ndim); If a.ndim < d, a is promoted to be d-dimensional by prepending
 * \return new symbol
 */
inline Symbol tile(const std::string& symbol_name,
                   Symbol data,
                   Shape reps) {
  return Operator("tile")
           .SetParam("reps", reps)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Reverses the order of elements along given axis while preserving array shape.
 *
 *        Note: reverse and flip are equivalent. We use reverse in the following examples.
 *
 *        Examples::
 *
 *        x = [[ 0.,  1.,  2.,  3.,  4.],
 *        [ 5.,  6.,  7.,  8.,  9.]]
 *
 *        reverse(x, axis=0) = [[ 5.,  6.,  7.,  8.,  9.],
 *        [ 0.,  1.,  2.,  3.,  4.]]
 *
 *        reverse(x, axis=1) = [[ 4.,  3.,  2.,  1.,  0.],
 *        [ 9.,  8.,  7.,  6.,  5.]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L898
 * \param symbol_name name of the resulting symbol
 * \param data Input data array
 * \param axis The axis which to reverse elements.
 * \return new symbol
 */
inline Symbol reverse(const std::string& symbol_name,
                      Symbol data,
                      Shape axis) {
  return Operator("reverse")
           .SetParam("axis", axis)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Join a sequence of arrays along a new axis.
 *
 *        The axis parameter specifies the index of the new axis in the dimensions of the
 *        result. For example, if axis=0 it will be the first dimension and if axis=-1 it
 *        will be the last dimension.
 *
 *        Examples::
 *
 *        x = [1, 2]
 *        y = [3, 4]
 *
 *        stack(x, y) = [[1, 2],
 *        [3, 4]]
 *        stack(x, y, axis=1) = [[1, 3],
 *        [2, 4]]
 *
 * \param symbol_name name of the resulting symbol
 * \param data List of arrays to stack
 * \param num_args Number of inputs to be stacked.
 * \param axis The axis in the result array along which the input arrays are stacked.
 * \return new symbol
 */
inline Symbol stack(const std::string& symbol_name,
                    const std::vector<Symbol>& data,
                    int num_args,
                    int axis = 0) {
  return Operator("stack")
           .SetParam("num_args", num_args)
           .SetParam("axis", axis)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Remove single-dimensional entries from the shape of an array.
 *        Same behavior of defining the output tensor shape as numpy.squeeze for the most
 *        See the following note for exception.
 *
 *        Examples::
 *
 *        data = [[[0], [1], [2]]]
 *        squeeze(data) = [0, 1, 2]
 *        squeeze(data, axis=0) = [[0], [1], [2]]
 *        squeeze(data, axis=2) = [[0, 1, 2]]
 *        squeeze(data, axis=(0, 2)) = [0, 1, 2]
 *
 *        .. Note::
 *        The output of this operator will keep at least one dimension not removed. For
 *        squeeze([[[4]]]) = [4], while in numpy.squeeze, the output will become a scalar.
 *
 * \param symbol_name name of the resulting symbol
 * \param data data to squeeze
 * \param axis Selects a subset of the single-dimensional entries in the shape. If an
 * \return new symbol
 */
inline Symbol squeeze(const std::string& symbol_name,
                      const std::vector<Symbol>& data,
                      dmlc::optional<Shape> axis = dmlc::optional<Shape>()) {
  return Operator("squeeze")
           .SetParam("axis", axis)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Rearranges(permutes) data from depth into blocks of spatial data.
 *        Similar to ONNX DepthToSpace operator:
 *        https://github.com/onnx/onnx/blob/master/docs/Operators.md#DepthToSpace.
 *        The output is a new tensor where the values from depth dimension are moved in
 *        to height and width dimension. The reverse of this operation is
 *
 *        .. math::
 *
 *        \begin{gather*}
 *        x \prime = reshape(x, [N, block\_size, block\_size, C / (block\_size ^ 2), H *
 *        x \prime \prime = transpose(x \prime, [0, 3, 4, 1, 5, 2]) \\
 *        y = reshape(x \prime \prime, [N, C / (block\_size ^ 2), H * block\_size, W *
 *        \end{gather*}
 *
 *        where :math:`x` is an input tensor with default layout as :math:`[N, C, H, W]`:
 *        and :math:`y` is the output tensor of layout :math:`[N, C / (block\_size ^ 2),
 *
 *        Example::
 *
 *        x = [[[[0, 1, 2],
 *        [3, 4, 5]],
 *        [[6, 7, 8],
 *        [9, 10, 11]],
 *        [[12, 13, 14],
 *        [15, 16, 17]],
 *        [[18, 19, 20],
 *        [21, 22, 23]]]]
 *
 *        depth_to_space(x, 2) = [[[[0, 6, 1, 7, 2, 8],
 *        [12, 18, 13, 19, 14, 20],
 *        [3, 9, 4, 10, 5, 11],
 *        [15, 21, 16, 22, 17, 23]]]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L1050
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \param block_size Blocks of [block_size. block_size] are moved
 * \return new symbol
 */
inline Symbol depth_to_space(const std::string& symbol_name,
                             Symbol data,
                             int block_size) {
  return Operator("depth_to_space")
           .SetParam("block_size", block_size)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Rearranges(permutes) blocks of spatial data into depth.
 *        Similar to ONNX SpaceToDepth operator:
 *        https://github.com/onnx/onnx/blob/master/docs/Operators.md#SpaceToDepth
 *
 *        The output is a new tensor where the values from height and width dimension are
 *        moved to the depth dimension. The reverse of this operation is
 *
 *        .. math::
 *
 *        \begin{gather*}
 *        x \prime = reshape(x, [N, C, H / block\_size, block\_size, W / block\_size,
 *        x \prime \prime = transpose(x \prime, [0, 3, 5, 1, 2, 4]) \\
 *        y = reshape(x \prime \prime, [N, C * (block\_size ^ 2), H / block\_size, W /
 *        \end{gather*}
 *
 *        where :math:`x` is an input tensor with default layout as :math:`[N, C, H, W]`:
 *        and :math:`y` is the output tensor of layout :math:`[N, C * (block\_size ^ 2),
 *
 *        Example::
 *
 *        x = [[[[0, 6, 1, 7, 2, 8],
 *        [12, 18, 13, 19, 14, 20],
 *        [3, 9, 4, 10, 5, 11],
 *        [15, 21, 16, 22, 17, 23]]]]
 *
 *
 *        space_to_depth(x, 2) = [[[[0, 1, 2],
 *        [3, 4, 5]],
 *        [[6, 7, 8],
 *        [9, 10, 11]],
 *        [[12, 13, 14],
 *        [15, 16, 17]],
 *        [[18, 19, 20],
 *        [21, 22, 23]]]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L1104
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \param block_size Blocks of [block_size. block_size] are moved
 * \return new symbol
 */
inline Symbol space_to_depth(const std::string& symbol_name,
                             Symbol data,
                             int block_size) {
  return Operator("space_to_depth")
           .SetParam("block_size", block_size)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Splits an array along a particular axis into multiple sub-arrays.
 *
 *        Example::
 *
 *        x  = [[[ 1.]
 *        [ 2.]]
 *        [[ 3.]
 *        [ 4.]]
 *        [[ 5.]
 *        [ 6.]]]
 *        x.shape = (3, 2, 1)
 *
 *        y = split_v2(x, axis=1, indices_or_sections=2) // a list of 2 arrays with shape
 *        y = [[[ 1.]]
 *        [[ 3.]]
 *        [[ 5.]]]
 *
 *        [[[ 2.]]
 *        [[ 4.]]
 *        [[ 6.]]]
 *
 *        y[0].shape = (3, 1, 1)
 *
 *        z = split_v2(x, axis=0, indices_or_sections=3) // a list of 3 arrays with shape
 *        z = [[[ 1.]
 *        [ 2.]]]
 *
 *        [[[ 3.]
 *        [ 4.]]]
 *
 *        [[[ 5.]
 *        [ 6.]]]
 *
 *        z[0].shape = (1, 2, 1)
 *
 *        w = split_v2(x, axis=0, indices_or_sections=(1,)) // a list of 2 arrays with
 *        w = [[[ 1.]
 *        [ 2.]]]
 *
 *        [[[3.]
 *        [4.]]
 *
 *        [[5.]
 *        [6.]]]
 *
 *        w[0].shape = (1, 2, 1)
 *        w[1].shape = (2, 2, 1)
 *
 *        `squeeze_axis=True` removes the axis with length 1 from the shapes of the
 *        **Note** that setting `squeeze_axis` to ``1`` removes axis with length 1 only
 *        along the `axis` which it is split.
 *        Also `squeeze_axis` can be set to true only if ``input.shape[axis] ==
 *
 *        Example::
 *
 *        z = split_v2(x, axis=0, indices_or_sections=3, squeeze_axis=1) // a list of 3
 *        z = [[ 1.]
 *        [ 2.]]
 *
 *        [[ 3.]
 *        [ 4.]]
 *
 *        [[ 5.]
 *        [ 6.]]
 *        z[0].shape = (2, 1)
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L1190
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param indices Indices of splits. The elements should denote the boundaries of at
 * \param axis Axis along which to split.
 * \param squeeze_axis If true, Removes the axis with length 1 from the shapes of the
 *        output arrays. **Note** that setting `squeeze_axis` to ``true`` removes axis
 *        with length 1 only along the `axis` which it is split. Also `squeeze_axis` can
 * \param sections Number of sections if equally splitted. Default to 0 which means split
 * \return new symbol
 */
inline Symbol _split_v2(const std::string& symbol_name,
                        Symbol data,
                        Shape indices,
                        int axis = 1,
                        bool squeeze_axis = false,
                        int sections = 0) {
  return Operator("_split_v2")
           .SetParam("indices", indices)
           .SetParam("axis", axis)
           .SetParam("squeeze_axis", squeeze_axis)
           .SetParam("sections", sections)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \return new symbol
 */
inline Symbol _split_v2_backward(const std::string& symbol_name) {
  return Operator("_split_v2_backward")
           .CreateSymbol(symbol_name);
}

/*! \brief Output storage type.
 */
enum class Cast_storageStype {
  kCsr = 0,
  kDefault = 1,
  kRow_sparse = 2
};

/*!
 * \brief Casts tensor storage type to the new type.
 *
 *        When an NDArray with default storage type is cast to csr or row_sparse storage,
 *        the result is compact, which means:
 *
 *        - for csr, zero values will not be retained
 *        - for row_sparse, row slices of all zeros will not be retained
 *
 *        The storage type of ``cast_storage`` output depends on stype parameter:
 *
 *        - cast_storage(csr, 'default') = default
 *        - cast_storage(row_sparse, 'default') = default
 *        - cast_storage(default, 'csr') = csr
 *        - cast_storage(default, 'row_sparse') = row_sparse
 *        - cast_storage(csr, 'csr') = csr
 *        - cast_storage(row_sparse, 'row_sparse') = row_sparse
 *
 *        Example::
 *
 *        dense = [[ 0.,  1.,  0.],
 *        [ 2.,  0.,  3.],
 *        [ 0.,  0.,  0.],
 *        [ 0.,  0.,  0.]]
 *
 *        # cast to row_sparse storage type
 *        rsp = cast_storage(dense, 'row_sparse')
 *        rsp.indices = [0, 1]
 *        rsp.values = [[ 0.,  1.,  0.],
 *        [ 2.,  0.,  3.]]
 *
 *        # cast to csr storage type
 *        csr = cast_storage(dense, 'csr')
 *        csr.indices = [1, 0, 2]
 *        csr.values = [ 1.,  2.,  3.]
 *        csr.indptr = [0, 1, 3, 3, 3]
 *
 *
 *
 *        Defined in src/operator/tensor/cast_storage.cc:L71
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param stype Output storage type.
 * \return new symbol
 */
inline Symbol cast_storage(const std::string& symbol_name,
                           Symbol data,
                           Cast_storageStype stype) {
  static const char *Cast_storageStypeValues[] = {
    "csr",
    "default",
    "row_sparse"
  };
  return Operator("cast_storage")
           .SetParam("stype", Cast_storageStypeValues[int(stype)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Converts a batch of index arrays into an array of flat indices. The operator
 *        follows numpy conventions so a single multi index is given by a column of the
 *        input matrix. The leading dimension may be left unspecified by using -1 as
 *
 *        Examples::
 *
 *        A = [[3,6,6],[4,5,1]]
 *        ravel(A, shape=(7,6)) = [22,41,37]
 *        ravel(A, shape=(-1,6)) = [22,41,37]
 *
 *
 *
 *        Defined in src/operator/tensor/ravel.cc:L42
 * \param symbol_name name of the resulting symbol
 * \param data Batch of multi-indices
 * \param shape Shape of the array into which the multi-indices apply.
 * \return new symbol
 */
inline Symbol _ravel_multi_index(const std::string& symbol_name,
                                 Symbol data,
                                 Shape shape = Shape()) {
  return Operator("_ravel_multi_index")
           .SetParam("shape", shape)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Converts an array of flat indices into a batch of index arrays. The operator
 *        follows numpy conventions so a single multi index is given by a column of the
 *        output matrix. The leading dimension may be left unspecified by using -1 as
 *
 *        Examples::
 *
 *        A = [22,41,37]
 *        unravel(A, shape=(7,6)) = [[3,6,6],[4,5,1]]
 *        unravel(A, shape=(-1,6)) = [[3,6,6],[4,5,1]]
 *
 *
 *
 *        Defined in src/operator/tensor/ravel.cc:L67
 * \param symbol_name name of the resulting symbol
 * \param data Array of flat indices
 * \param shape Shape of the array into which the multi-indices apply.
 * \return new symbol
 */
inline Symbol _unravel_index(const std::string& symbol_name,
                             Symbol data,
                             Shape shape = Shape()) {
  return Operator("_unravel_index")
           .SetParam("shape", shape)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _equal_scalar(const std::string& symbol_name,
                            Symbol data,
                            mx_float scalar) {
  return Operator("_equal_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _not_equal_scalar(const std::string& symbol_name,
                                Symbol data,
                                mx_float scalar) {
  return Operator("_not_equal_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _greater_scalar(const std::string& symbol_name,
                              Symbol data,
                              mx_float scalar) {
  return Operator("_greater_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _greater_equal_scalar(const std::string& symbol_name,
                                    Symbol data,
                                    mx_float scalar) {
  return Operator("_greater_equal_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _lesser_scalar(const std::string& symbol_name,
                             Symbol data,
                             mx_float scalar) {
  return Operator("_lesser_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _lesser_equal_scalar(const std::string& symbol_name,
                                   Symbol data,
                                   mx_float scalar) {
  return Operator("_lesser_equal_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _logical_and_scalar(const std::string& symbol_name,
                                  Symbol data,
                                  mx_float scalar) {
  return Operator("_logical_and_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _logical_or_scalar(const std::string& symbol_name,
                                 Symbol data,
                                 mx_float scalar) {
  return Operator("_logical_or_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _logical_xor_scalar(const std::string& symbol_name,
                                  Symbol data,
                                  mx_float scalar) {
  return Operator("_logical_xor_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the element-wise sine of the input array.
 *
 *        The input should be in radians (:math:`2\pi` rad equals 360 degrees).
 *
 *        .. math::
 *        sin([0, \pi/4, \pi/2]) = [0, 0.707, 1]
 *
 *        The storage type of ``sin`` output depends upon the input storage type:
 *
 *        - sin(default) = default
 *        - sin(row_sparse) = row_sparse
 *        - sin(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L46
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sin(const std::string& symbol_name,
                  Symbol data) {
  return Operator("sin")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the element-wise cosine of the input array.
 *
 *        The input should be in radians (:math:`2\pi` rad equals 360 degrees).
 *
 *        .. math::
 *        cos([0, \pi/4, \pi/2]) = [1, 0.707, 0]
 *
 *        The storage type of ``cos`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L89
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol cos(const std::string& symbol_name,
                  Symbol data) {
  return Operator("cos")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the element-wise tangent of the input array.
 *
 *        The input should be in radians (:math:`2\pi` rad equals 360 degrees).
 *
 *        .. math::
 *        tan([0, \pi/4, \pi/2]) = [0, 1, -inf]
 *
 *        The storage type of ``tan`` output depends upon the input storage type:
 *
 *        - tan(default) = default
 *        - tan(row_sparse) = row_sparse
 *        - tan(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L139
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol tan(const std::string& symbol_name,
                  Symbol data) {
  return Operator("tan")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise inverse sine of the input array.
 *
 *        The input should be in the range `[-1, 1]`.
 *        The output is in the closed interval of [:math:`-\pi/2`, :math:`\pi/2`].
 *
 *        .. math::
 *        arcsin([-1, -.707, 0, .707, 1]) = [-\pi/2, -\pi/4, 0, \pi/4, \pi/2]
 *
 *        The storage type of ``arcsin`` output depends upon the input storage type:
 *
 *        - arcsin(default) = default
 *        - arcsin(row_sparse) = row_sparse
 *        - arcsin(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L160
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arcsin(const std::string& symbol_name,
                     Symbol data) {
  return Operator("arcsin")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise inverse cosine of the input array.
 *
 *        The input should be in range `[-1, 1]`.
 *        The output is in the closed interval :math:`[0, \pi]`
 *
 *        .. math::
 *        arccos([-1, -.707, 0, .707, 1]) = [\pi, 3\pi/4, \pi/2, \pi/4, 0]
 *
 *        The storage type of ``arccos`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L179
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arccos(const std::string& symbol_name,
                     Symbol data) {
  return Operator("arccos")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns element-wise inverse tangent of the input array.
 *
 *        The output is in the closed interval :math:`[-\pi/2, \pi/2]`
 *
 *        .. math::
 *        arctan([-1, 0, 1]) = [-\pi/4, 0, \pi/4]
 *
 *        The storage type of ``arctan`` output depends upon the input storage type:
 *
 *        - arctan(default) = default
 *        - arctan(row_sparse) = row_sparse
 *        - arctan(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L200
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arctan(const std::string& symbol_name,
                     Symbol data) {
  return Operator("arctan")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Converts each element of the input array from radians to degrees.
 *
 *        .. math::
 *        degrees([0, \pi/2, \pi, 3\pi/2, 2\pi]) = [0, 90, 180, 270, 360]
 *
 *        The storage type of ``degrees`` output depends upon the input storage type:
 *
 *        - degrees(default) = default
 *        - degrees(row_sparse) = row_sparse
 *        - degrees(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L219
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol degrees(const std::string& symbol_name,
                      Symbol data) {
  return Operator("degrees")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Converts each element of the input array from degrees to radians.
 *
 *        .. math::
 *        radians([0, 90, 180, 270, 360]) = [0, \pi/2, \pi, 3\pi/2, 2\pi]
 *
 *        The storage type of ``radians`` output depends upon the input storage type:
 *
 *        - radians(default) = default
 *        - radians(row_sparse) = row_sparse
 *        - radians(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L238
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol radians(const std::string& symbol_name,
                      Symbol data) {
  return Operator("radians")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the hyperbolic sine of the input array, computed element-wise.
 *
 *        .. math::
 *        sinh(x) = 0.5\times(exp(x) - exp(-x))
 *
 *        The storage type of ``sinh`` output depends upon the input storage type:
 *
 *        - sinh(default) = default
 *        - sinh(row_sparse) = row_sparse
 *        - sinh(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L257
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sinh(const std::string& symbol_name,
                   Symbol data) {
  return Operator("sinh")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the hyperbolic cosine  of the input array, computed element-wise.
 *
 *        .. math::
 *        cosh(x) = 0.5\times(exp(x) + exp(-x))
 *
 *        The storage type of ``cosh`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L272
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol cosh(const std::string& symbol_name,
                   Symbol data) {
  return Operator("cosh")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the hyperbolic tangent of the input array, computed element-wise.
 *
 *        .. math::
 *        tanh(x) = sinh(x) / cosh(x)
 *
 *        The storage type of ``tanh`` output depends upon the input storage type:
 *
 *        - tanh(default) = default
 *        - tanh(row_sparse) = row_sparse
 *        - tanh(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L290
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol tanh(const std::string& symbol_name,
                   Symbol data) {
  return Operator("tanh")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the element-wise inverse hyperbolic sine of the input array, \
 *        computed element-wise.
 *
 *        The storage type of ``arcsinh`` output depends upon the input storage type:
 *
 *        - arcsinh(default) = default
 *        - arcsinh(row_sparse) = row_sparse
 *        - arcsinh(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L306
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arcsinh(const std::string& symbol_name,
                      Symbol data) {
  return Operator("arcsinh")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the element-wise inverse hyperbolic cosine of the input array, \
 *        computed element-wise.
 *
 *        The storage type of ``arccosh`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L320
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arccosh(const std::string& symbol_name,
                      Symbol data) {
  return Operator("arccosh")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns the element-wise inverse hyperbolic tangent of the input array, \
 *        computed element-wise.
 *
 *        The storage type of ``arctanh`` output depends upon the input storage type:
 *
 *        - arctanh(default) = default
 *        - arctanh(row_sparse) = row_sparse
 *        - arctanh(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L337
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arctanh(const std::string& symbol_name,
                      Symbol data) {
  return Operator("arctanh")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred.
 */
enum class _sample_multinomialDtype {
  kFloat16 = 0,
  kFloat32 = 1,
  kFloat64 = 2,
  kInt32 = 3,
  kUint8 = 4
};

/*!
 * \brief Concurrent sampling from multiple multinomial distributions.
 *
 *        *data* is an *n* dimensional array whose last dimension has length *k*, where
 *        *k* is the number of possible outcomes of each multinomial distribution. This
 *        operator will draw *shape* samples from each distribution. If shape is empty
 *        one sample will be drawn from each distribution.
 *
 *        If *get_prob* is true, a second array containing log likelihood of the drawn
 *        samples will also be returned. This is usually used for reinforcement learning
 *        where you can provide reward as head gradient for this array to estimate
 *        gradient.
 *
 *        Note that the input distribution must be normalized, i.e. *data* must sum to
 *        1 along its last axis.
 *
 *        Examples::
 *
 *        probs = [[0, 0.1, 0.2, 0.3, 0.4], [0.4, 0.3, 0.2, 0.1, 0]]
 *
 *        // Draw a single sample for each distribution
 *        sample_multinomial(probs) = [3, 0]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_multinomial(probs, shape=(2)) = [[4, 2],
 *        [0, 0]]
 *
 *        // requests log likelihood
 *        sample_multinomial(probs, get_prob=True) = [2, 1], [0.2, 0.3]
 *
 * \param symbol_name name of the resulting symbol
 * \param data Distribution probabilities. Must sum to one on the last axis.
 * \param shape Shape to be sampled from each random distribution.
 * \param get_prob Whether to also return the log probability of sampled result. This is
 *        usually used for differentiating through stochastic variables, e.g. in
 * \param dtype DType of the output in case this can't be inferred.
 * \return new symbol
 */
inline Symbol _sample_multinomial(const std::string& symbol_name,
                                  Symbol data,
                                  Shape shape = {},
                                  bool get_prob = false,
                                  _sample_multinomialDtype dtype = _sample_multinomialDtype::kInt32) {
  static const char *_sample_multinomialDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "uint8"
  };
  return Operator("_sample_multinomial")
           .SetParam("shape", shape)
           .SetParam("get_prob", get_prob)
           .SetParam("dtype", _sample_multinomialDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _sample_uniformDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Concurrent sampling from multiple
 *        uniform distributions on the intervals given by *[low,high)*.
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Examples::
 *
 *        low = [ 0.0, 2.5 ]
 *        high = [ 1.0, 3.7 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_uniform(low, high) = [ 0.40451524,  3.18687344]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_uniform(low, high, shape=(2)) = [[ 0.40451524,  0.18017688],
 *        [ 3.18687344,  3.68352246]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L276
 * \param symbol_name name of the resulting symbol
 * \param low Lower bounds of the distributions.
 * \param high Upper bounds of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_uniform(const std::string& symbol_name,
                              Symbol low,
                              Symbol high,
                              Shape shape = Shape(),
                              _sample_uniformDtype dtype = _sample_uniformDtype::kNone) {
  static const char *_sample_uniformDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_uniform")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_uniformDtypeValues[int(dtype)])
           .SetInput("low", low)
           .SetInput("high", high)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _sample_normalDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Concurrent sampling from multiple
 *        normal distributions with parameters *mu* (mean) and *sigma* (standard
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Examples::
 *
 *        mu = [ 0.0, 2.5 ]
 *        sigma = [ 1.0, 3.7 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_normal(mu, sigma) = [-0.56410581,  0.95934606]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_normal(mu, sigma, shape=(2)) = [[-0.56410581,  0.2928229 ],
 *        [ 0.95934606,  4.48287058]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L278
 * \param symbol_name name of the resulting symbol
 * \param mu Means of the distributions.
 * \param sigma Standard deviations of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_normal(const std::string& symbol_name,
                             Symbol mu,
                             Symbol sigma,
                             Shape shape = Shape(),
                             _sample_normalDtype dtype = _sample_normalDtype::kNone) {
  static const char *_sample_normalDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_normal")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_normalDtypeValues[int(dtype)])
           .SetInput("mu", mu)
           .SetInput("sigma", sigma)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _sample_gammaDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Concurrent sampling from multiple
 *        gamma distributions with parameters *alpha* (shape) and *beta* (scale).
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Examples::
 *
 *        alpha = [ 0.0, 2.5 ]
 *        beta = [ 1.0, 0.7 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_gamma(alpha, beta) = [ 0.        ,  2.25797319]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_gamma(alpha, beta, shape=(2)) = [[ 0.        ,  0.        ],
 *        [ 2.25797319,  1.70734084]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L280
 * \param symbol_name name of the resulting symbol
 * \param alpha Alpha (shape) parameters of the distributions.
 * \param beta Beta (scale) parameters of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_gamma(const std::string& symbol_name,
                            Symbol alpha,
                            Symbol beta,
                            Shape shape = Shape(),
                            _sample_gammaDtype dtype = _sample_gammaDtype::kNone) {
  static const char *_sample_gammaDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_gamma")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_gammaDtypeValues[int(dtype)])
           .SetInput("alpha", alpha)
           .SetInput("beta", beta)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _sample_exponentialDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Concurrent sampling from multiple
 *        exponential distributions with parameters lambda (rate).
 *
 *        The parameters of the distributions are provided as an input array.
 *        Let *[s]* be the shape of the input array, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input array,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input value at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input array.
 *
 *        Examples::
 *
 *        lam = [ 1.0, 8.5 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_exponential(lam) = [ 0.51837951,  0.09994757]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_exponential(lam, shape=(2)) = [[ 0.51837951,  0.19866663],
 *        [ 0.09994757,  0.50447971]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L283
 * \param symbol_name name of the resulting symbol
 * \param lam Lambda (rate) parameters of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_exponential(const std::string& symbol_name,
                                  Symbol lam,
                                  Shape shape = Shape(),
                                  _sample_exponentialDtype dtype = _sample_exponentialDtype::kNone) {
  static const char *_sample_exponentialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_exponential")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_exponentialDtypeValues[int(dtype)])
           .SetInput("lam", lam)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _sample_poissonDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Concurrent sampling from multiple
 *        Poisson distributions with parameters lambda (rate).
 *
 *        The parameters of the distributions are provided as an input array.
 *        Let *[s]* be the shape of the input array, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input array,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input value at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input array.
 *
 *        Samples will always be returned as a floating point data type.
 *
 *        Examples::
 *
 *        lam = [ 1.0, 8.5 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_poisson(lam) = [  0.,  13.]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_poisson(lam, shape=(2)) = [[  0.,   4.],
 *        [ 13.,   8.]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L285
 * \param symbol_name name of the resulting symbol
 * \param lam Lambda (rate) parameters of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_poisson(const std::string& symbol_name,
                              Symbol lam,
                              Shape shape = Shape(),
                              _sample_poissonDtype dtype = _sample_poissonDtype::kNone) {
  static const char *_sample_poissonDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_poisson")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_poissonDtypeValues[int(dtype)])
           .SetInput("lam", lam)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _sample_negative_binomialDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Concurrent sampling from multiple
 *        negative binomial distributions with parameters *k* (failure limit) and *p*
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Samples will always be returned as a floating point data type.
 *
 *        Examples::
 *
 *        k = [ 20, 49 ]
 *        p = [ 0.4 , 0.77 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_negative_binomial(k, p) = [ 15.,  16.]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_negative_binomial(k, p, shape=(2)) = [[ 15.,  50.],
 *        [ 16.,  12.]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L287
 * \param symbol_name name of the resulting symbol
 * \param k Limits of unsuccessful experiments.
 * \param p Failure probabilities in each experiment.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_negative_binomial(const std::string& symbol_name,
                                        Symbol k,
                                        Symbol p,
                                        Shape shape = Shape(),
                                        _sample_negative_binomialDtype dtype = _sample_negative_binomialDtype::kNone) {
  static const char *_sample_negative_binomialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_negative_binomial")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_negative_binomialDtypeValues[int(dtype)])
           .SetInput("k", k)
           .SetInput("p", p)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _sample_generalized_negative_binomialDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Concurrent sampling from multiple
 *        generalized negative binomial distributions with parameters *mu* (mean) and
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Samples will always be returned as a floating point data type.
 *
 *        Examples::
 *
 *        mu = [ 2.0, 2.5 ]
 *        alpha = [ 1.0, 0.1 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_generalized_negative_binomial(mu, alpha) = [ 0.,  3.]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_generalized_negative_binomial(mu, alpha, shape=(2)) = [[ 0.,  3.],
 *        [ 3.,  1.]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L290
 * \param symbol_name name of the resulting symbol
 * \param mu Means of the distributions.
 * \param alpha Alpha (dispersion) parameters of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_generalized_negative_binomial(const std::string& symbol_name,
                                                    Symbol mu,
                                                    Symbol alpha,
                                                    Shape shape = Shape(),
                                                    _sample_generalized_negative_binomialDtype dtype = _sample_generalized_negative_binomialDtype::kNone) {
  static const char *_sample_generalized_negative_binomialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_generalized_negative_binomial")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_generalized_negative_binomialDtypeValues[int(dtype)])
           .SetInput("mu", mu)
           .SetInput("alpha", alpha)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Draw random samples from an an approximately log-uniform
 *        or Zipfian distribution without replacement.
 *
 *        This operation takes a 2-D shape `(batch_size, num_sampled)`,
 *        and randomly generates *num_sampled* samples from the range of integers [0,
 *        for each instance in the batch.
 *
 *        The elements in each instance are drawn without replacement from the base
 *        The base distribution for this operator is an approximately log-uniform or
 *
 *        P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)
 *
 *        Additionaly, it also returns the number of trials used to obtain `num_sampled`
 *        each instance in the batch.
 *
 *        Example::
 *
 *        samples, trials = _sample_unique_zipfian(750000, shape=(4, 8192))
 *        unique(samples[0]) = 8192
 *        unique(samples[3]) = 8192
 *        trials[0] = 16435
 *
 *
 *
 *        Defined in src/operator/random/unique_sample_op.cc:L66
 * \param symbol_name name of the resulting symbol
 * \param range_max The number of possible classes.
 * \param shape 2-D shape of the output, where shape[0] is the batch size, and shape[1]
 * \return new symbol
 */
inline Symbol _sample_unique_zipfian(const std::string& symbol_name,
                                     int range_max,
                                     Shape shape = Shape()) {
  return Operator("_sample_unique_zipfian")
           .SetParam("range_max", range_max)
           .SetParam("shape", shape)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _random_uniformDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Draw random samples from a uniform distribution.
 *
 *        .. note:: The existing alias ``uniform`` is deprecated.
 *
 *        Samples are uniformly distributed over the half-open interval *[low, high)*
 *        (includes *low*, but excludes *high*).
 *
 *        Example::
 *
 *        uniform(low=0, high=1, shape=(2,2)) = [[ 0.60276335,  0.85794562],
 *        [ 0.54488319,  0.84725171]]
 *
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L96
 * \param symbol_name name of the resulting symbol
 * \param low Lower bound of the distribution.
 * \param high Upper bound of the distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_uniform(const std::string& symbol_name,
                              mx_float low = 0,
                              mx_float high = 1,
                              Shape shape = Shape(),
                              const std::string& ctx = "",
                              _random_uniformDtype dtype = _random_uniformDtype::kNone) {
  static const char *_random_uniformDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_uniform")
           .SetParam("low", low)
           .SetParam("high", high)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_uniformDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _random_normalDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Draw random samples from a normal (Gaussian) distribution.
 *
 *        .. note:: The existing alias ``normal`` is deprecated.
 *
 *        Samples are distributed according to a normal distribution parametrized by
 *        (standard deviation).
 *
 *        Example::
 *
 *        normal(loc=0, scale=1, shape=(2,2)) = [[ 1.89171135, -1.16881478],
 *        [-1.23474145,  1.55807114]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L113
 * \param symbol_name name of the resulting symbol
 * \param loc Mean of the distribution.
 * \param scale Standard deviation of the distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_normal(const std::string& symbol_name,
                             mx_float loc = 0,
                             mx_float scale = 1,
                             Shape shape = Shape(),
                             const std::string& ctx = "",
                             _random_normalDtype dtype = _random_normalDtype::kNone) {
  static const char *_random_normalDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_normal")
           .SetParam("loc", loc)
           .SetParam("scale", scale)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_normalDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _random_gammaDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Draw random samples from a gamma distribution.
 *
 *        Samples are distributed according to a gamma distribution parametrized by
 *
 *        Example::
 *
 *        gamma(alpha=9, beta=0.5, shape=(2,2)) = [[ 7.10486984,  3.37695289],
 *        [ 3.91697288,  3.65933681]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L125
 * \param symbol_name name of the resulting symbol
 * \param alpha Alpha parameter (shape) of the gamma distribution.
 * \param beta Beta parameter (scale) of the gamma distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_gamma(const std::string& symbol_name,
                            mx_float alpha = 1,
                            mx_float beta = 1,
                            Shape shape = Shape(),
                            const std::string& ctx = "",
                            _random_gammaDtype dtype = _random_gammaDtype::kNone) {
  static const char *_random_gammaDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_gamma")
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_gammaDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _random_exponentialDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Draw random samples from an exponential distribution.
 *
 *        Samples are distributed according to an exponential distribution parametrized
 *
 *        Example::
 *
 *        exponential(lam=4, shape=(2,2)) = [[ 0.0097189 ,  0.08999364],
 *        [ 0.04146638,  0.31715935]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L137
 * \param symbol_name name of the resulting symbol
 * \param lam Lambda parameter (rate) of the exponential distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_exponential(const std::string& symbol_name,
                                  mx_float lam = 1,
                                  Shape shape = Shape(),
                                  const std::string& ctx = "",
                                  _random_exponentialDtype dtype = _random_exponentialDtype::kNone) {
  static const char *_random_exponentialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_exponential")
           .SetParam("lam", lam)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_exponentialDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _random_poissonDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Draw random samples from a Poisson distribution.
 *
 *        Samples are distributed according to a Poisson distribution parametrized by
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        poisson(lam=4, shape=(2,2)) = [[ 5.,  2.],
 *        [ 4.,  6.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L150
 * \param symbol_name name of the resulting symbol
 * \param lam Lambda parameter (rate) of the Poisson distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_poisson(const std::string& symbol_name,
                              mx_float lam = 1,
                              Shape shape = Shape(),
                              const std::string& ctx = "",
                              _random_poissonDtype dtype = _random_poissonDtype::kNone) {
  static const char *_random_poissonDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_poisson")
           .SetParam("lam", lam)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_poissonDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _random_negative_binomialDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Draw random samples from a negative binomial distribution.
 *
 *        Samples are distributed according to a negative binomial distribution
 *        *k* (limit of unsuccessful experiments) and *p* (failure probability in each
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        negative_binomial(k=3, p=0.4, shape=(2,2)) = [[ 4.,  7.],
 *        [ 2.,  5.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L164
 * \param symbol_name name of the resulting symbol
 * \param k Limit of unsuccessful experiments.
 * \param p Failure probability in each experiment.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_negative_binomial(const std::string& symbol_name,
                                        int k = 1,
                                        mx_float p = 1,
                                        Shape shape = Shape(),
                                        const std::string& ctx = "",
                                        _random_negative_binomialDtype dtype = _random_negative_binomialDtype::kNone) {
  static const char *_random_negative_binomialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_negative_binomial")
           .SetParam("k", k)
           .SetParam("p", p)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_negative_binomialDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to float32 if not
 */
enum class _random_generalized_negative_binomialDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Draw random samples from a generalized negative binomial distribution.
 *
 *        Samples are distributed according to a generalized negative binomial
 *        *mu* (mean) and *alpha* (dispersion). *alpha* is defined as *1/k* where *k* is
 *        number of unsuccessful experiments (generalized to real numbers).
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        generalized_negative_binomial(mu=2.0, alpha=0.3, shape=(2,2)) = [[ 2.,  1.],
 *        [ 6.,  4.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L179
 * \param symbol_name name of the resulting symbol
 * \param mu Mean of the negative binomial distribution.
 * \param alpha Alpha (dispersion) parameter of the negative binomial distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_generalized_negative_binomial(const std::string& symbol_name,
                                                    mx_float mu = 1,
                                                    mx_float alpha = 1,
                                                    Shape shape = Shape(),
                                                    const std::string& ctx = "",
                                                    _random_generalized_negative_binomialDtype dtype = _random_generalized_negative_binomialDtype::kNone) {
  static const char *_random_generalized_negative_binomialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_generalized_negative_binomial")
           .SetParam("mu", mu)
           .SetParam("alpha", alpha)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_generalized_negative_binomialDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to int32 if not
 */
enum class _random_randintDtype {
  kNone = 0,
  kInt32 = 1,
  kInt64 = 2
};

/*!
 * \brief Draw random samples from a discrete uniform distribution.
 *
 *        Samples are uniformly distributed over the half-open interval *[low, high)*
 *        (includes *low*, but excludes *high*).
 *
 *        Example::
 *
 *        randint(low=0, high=5, shape=(2,2)) = [[ 0,  2],
 *        [ 3,  1]]
 *
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L193
 * \param symbol_name name of the resulting symbol
 * \param low Lower bound of the distribution.
 * \param high Upper bound of the distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to int32 if
 * \return new symbol
 */
inline Symbol _random_randint(const std::string& symbol_name,
                              int64_t low,
                              int64_t high,
                              Shape shape = Shape(),
                              const std::string& ctx = "",
                              _random_randintDtype dtype = _random_randintDtype::kNone) {
  static const char *_random_randintDtypeValues[] = {
    "None",
    "int32",
    "int64"
  };
  return Operator("_random_randint")
           .SetParam("low", low)
           .SetParam("high", high)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_randintDtypeValues[int(dtype)])
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Draw random samples from a uniform distribution according to the input array
 *
 *        Samples are uniformly distributed over the half-open interval *[low, high)*
 *        (includes *low*, but excludes *high*).
 *
 *        Example::
 *
 *        uniform(low=0, high=1, data=ones(2,2)) = [[ 0.60276335,  0.85794562],
 *        [ 0.54488319,  0.84725171]]
 *
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L208
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param low Lower bound of the distribution.
 * \param high Upper bound of the distribution.
 * \return new symbol
 */
inline Symbol _random_uniform_like(const std::string& symbol_name,
                                   Symbol data,
                                   mx_float low = 0,
                                   mx_float high = 1) {
  return Operator("_random_uniform_like")
           .SetParam("low", low)
           .SetParam("high", high)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Draw random samples from a normal (Gaussian) distribution according to the
 *
 *        Samples are distributed according to a normal distribution parametrized by
 *        (standard deviation).
 *
 *        Example::
 *
 *        normal(loc=0, scale=1, data=ones(2,2)) = [[ 1.89171135, -1.16881478],
 *        [-1.23474145,  1.55807114]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L220
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param loc Mean of the distribution.
 * \param scale Standard deviation of the distribution.
 * \return new symbol
 */
inline Symbol _random_normal_like(const std::string& symbol_name,
                                  Symbol data,
                                  mx_float loc = 0,
                                  mx_float scale = 1) {
  return Operator("_random_normal_like")
           .SetParam("loc", loc)
           .SetParam("scale", scale)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Draw random samples from a gamma distribution according to the input array
 *
 *        Samples are distributed according to a gamma distribution parametrized by
 *
 *        Example::
 *
 *        gamma(alpha=9, beta=0.5, data=ones(2,2)) = [[ 7.10486984,  3.37695289],
 *        [ 3.91697288,  3.65933681]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L231
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param alpha Alpha parameter (shape) of the gamma distribution.
 * \param beta Beta parameter (scale) of the gamma distribution.
 * \return new symbol
 */
inline Symbol _random_gamma_like(const std::string& symbol_name,
                                 Symbol data,
                                 mx_float alpha = 1,
                                 mx_float beta = 1) {
  return Operator("_random_gamma_like")
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Draw random samples from an exponential distribution according to the input
 *
 *        Samples are distributed according to an exponential distribution parametrized
 *
 *        Example::
 *
 *        exponential(lam=4, data=ones(2,2)) = [[ 0.0097189 ,  0.08999364],
 *        [ 0.04146638,  0.31715935]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L242
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param lam Lambda parameter (rate) of the exponential distribution.
 * \return new symbol
 */
inline Symbol _random_exponential_like(const std::string& symbol_name,
                                       Symbol data,
                                       mx_float lam = 1) {
  return Operator("_random_exponential_like")
           .SetParam("lam", lam)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Draw random samples from a Poisson distribution according to the input array
 *
 *        Samples are distributed according to a Poisson distribution parametrized by
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        poisson(lam=4, data=ones(2,2)) = [[ 5.,  2.],
 *        [ 4.,  6.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L254
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param lam Lambda parameter (rate) of the Poisson distribution.
 * \return new symbol
 */
inline Symbol _random_poisson_like(const std::string& symbol_name,
                                   Symbol data,
                                   mx_float lam = 1) {
  return Operator("_random_poisson_like")
           .SetParam("lam", lam)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Draw random samples from a negative binomial distribution according to the
 *
 *        Samples are distributed according to a negative binomial distribution
 *        *k* (limit of unsuccessful experiments) and *p* (failure probability in each
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        negative_binomial(k=3, p=0.4, data=ones(2,2)) = [[ 4.,  7.],
 *        [ 2.,  5.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L267
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param k Limit of unsuccessful experiments.
 * \param p Failure probability in each experiment.
 * \return new symbol
 */
inline Symbol _random_negative_binomial_like(const std::string& symbol_name,
                                             Symbol data,
                                             int k = 1,
                                             mx_float p = 1) {
  return Operator("_random_negative_binomial_like")
           .SetParam("k", k)
           .SetParam("p", p)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Draw random samples from a generalized negative binomial distribution according
 *        input array shape.
 *
 *        Samples are distributed according to a generalized negative binomial
 *        *mu* (mean) and *alpha* (dispersion). *alpha* is defined as *1/k* where *k* is
 *        number of unsuccessful experiments (generalized to real numbers).
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        generalized_negative_binomial(mu=2.0, alpha=0.3, data=ones(2,2)) = [[ 2.,  1.],
 *        [ 6.,  4.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L283
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param mu Mean of the negative binomial distribution.
 * \param alpha Alpha (dispersion) parameter of the negative binomial distribution.
 * \return new symbol
 */
inline Symbol _random_generalized_negative_binomial_like(const std::string& symbol_name,
                                                         Symbol data,
                                                         mx_float mu = 1,
                                                         mx_float alpha = 1) {
  return Operator("_random_generalized_negative_binomial_like")
           .SetParam("mu", mu)
           .SetParam("alpha", alpha)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Randomly shuffle the elements.
 *
 *        This shuffles the array along the first axis.
 *        The order of the elements in each subarray does not change.
 *        For example, if a 2D array is given, the order of the rows randomly changes,
 *        but the order of the elements in each row does not change.
 *
 * \param symbol_name name of the resulting symbol
 * \param data Data to be shuffled.
 * \return new symbol
 */
inline Symbol _shuffle(const std::string& symbol_name,
                       Symbol data) {
  return Operator("_shuffle")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief elemwise_add operator for input dataA and input dataB data type of int8,
 *        and accumulates in type int32 for the output. For each argument, two more
 *        float32 must be provided representing the thresholds of quantizing argument
 *        type float32 to int8. The final outputs contain result in int32, and min
 *        and max thresholds representing the threholds for quantizing the float32 output
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param lhs first input
 * \param rhs second input
 * \param lhs_min 3rd input
 * \param lhs_max 4th input
 * \param rhs_min 5th input
 * \param rhs_max 6th input
 * \return new symbol
 */
inline Symbol _contrib_quantized_elemwise_add(const std::string& symbol_name,
                                              Symbol lhs,
                                              Symbol rhs,
                                              Symbol lhs_min,
                                              Symbol lhs_max,
                                              Symbol rhs_min,
                                              Symbol rhs_max) {
  return Operator("_contrib_quantized_elemwise_add")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .SetInput("lhs_min", lhs_min)
           .SetInput("lhs_max", lhs_max)
           .SetInput("rhs_min", rhs_min)
           .SetInput("rhs_max", rhs_max)
           .CreateSymbol(symbol_name);
}

/*! \brief Output data type.
 */
enum class _contrib_dequantizeOutType {
  kFloat32 = 0
};

/*!
 * \brief Dequantize the input tensor into a float tensor.
 *        min_range and max_range are scalar floats that specify the range for
 *        the output data.
 *
 *        When input data type is `uint8`, the output is calculated using the following
 *
 *        `out[i] = in[i] * (max_range - min_range) / 255.0`,
 *
 *        When input data type is `int8`, the output is calculate using the following
 *        by keep zero centered for the quantized value:
 *
 *        `out[i] = in[i] * MaxAbs(min_range, max_range) / 127.0`,
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *
 *        Defined in src/operator/quantization/dequantize.cc:L83
 * \param symbol_name name of the resulting symbol
 * \param data A ndarray/symbol of type `uint8`
 * \param min_range The minimum scalar value possibly produced for the input in float32
 * \param max_range The maximum scalar value possibly produced for the input in float32
 * \param out_type Output data type.
 * \return new symbol
 */
inline Symbol _contrib_dequantize(const std::string& symbol_name,
                                  Symbol data,
                                  Symbol min_range,
                                  Symbol max_range,
                                  _contrib_dequantizeOutType out_type = _contrib_dequantizeOutType::kFloat32) {
  static const char *_contrib_dequantizeOutTypeValues[] = {
    "float32"
  };
  return Operator("_contrib_dequantize")
           .SetParam("out_type", _contrib_dequantizeOutTypeValues[int(out_type)])
           .SetInput("data", data)
           .SetInput("min_range", min_range)
           .SetInput("max_range", max_range)
           .CreateSymbol(symbol_name);
}

/*! \brief Whether to pick convolution algo by running performance test.
 */
enum class _contrib_quantized_convCudnnTune {
  kNone = 0,
  kFastest = 1,
  kLimited_workspace = 2,
  kOff = 3
};

/*! \brief Set layout for input, output and weight. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are
 */
enum class _contrib_quantized_convLayout {
  kNone = 0,
  kNCDHW = 1,
  kNCHW = 2,
  kNCW = 3,
  kNDHWC = 4,
  kNHWC = 5
};

/*!
 * \brief Convolution operator for input, weight and bias data type of int8,
 *        and accumulates in type int32 for the output. For each argument, two more
 *        float32 must be provided representing the thresholds of quantizing argument
 *        type float32 to int8. The final outputs contain the convolution result in
 *        and max thresholds representing the threholds for quantizing the float32 output
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/quantized_conv.cc:L137
 * \param symbol_name name of the resulting symbol
 * \param data Input data.
 * \param weight weight.
 * \param bias bias.
 * \param min_data Minimum value of data.
 * \param max_data Maximum value of data.
 * \param min_weight Minimum value of weight.
 * \param max_weight Maximum value of weight.
 * \param min_bias Minimum value of bias.
 * \param max_bias Maximum value of bias.
 * \param kernel Convolution kernel size: (w,), (h, w) or (d, h, w)
 * \param num_filter Convolution filter(channel) number
 * \param stride Convolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each
 * \param dilate Convolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each
 * \param pad Zero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.
 * \param num_group Number of group partitions.
 * \param workspace Maximum temporary workspace allowed (MB) in convolution.This
 *        parameter has two usages. When CUDNN is not used, it determines the effective
 *        batch size of the convolution kernel. When CUDNN is used, it controls the
 *        maximum temporary storage used for tuning the best CUDNN kernel when
 * \param no_bias Whether to disable bias parameter.
 * \param cudnn_tune Whether to pick convolution algo by running performance test.
 * \param cudnn_off Turn off cudnn for this layer.
 * \param layout Set layout for input, output and weight. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are
 * \return new symbol
 */
inline Symbol _contrib_quantized_conv(const std::string& symbol_name,
                                      Symbol data,
                                      Symbol weight,
                                      Symbol bias,
                                      Symbol min_data,
                                      Symbol max_data,
                                      Symbol min_weight,
                                      Symbol max_weight,
                                      Symbol min_bias,
                                      Symbol max_bias,
                                      Shape kernel,
                                      uint32_t num_filter,
                                      Shape stride = {},
                                      Shape dilate = {},
                                      Shape pad = {},
                                      uint32_t num_group = 1,
                                      uint64_t workspace = 1024,
                                      bool no_bias = false,
                                      _contrib_quantized_convCudnnTune cudnn_tune = _contrib_quantized_convCudnnTune::kNone,
                                      bool cudnn_off = false,
                                      _contrib_quantized_convLayout layout = _contrib_quantized_convLayout::kNone) {
  static const char *_contrib_quantized_convCudnnTuneValues[] = {
    "None",
    "fastest",
    "limited_workspace",
    "off"
  };
  static const char *_contrib_quantized_convLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC"
  };
  return Operator("_contrib_quantized_conv")
           .SetParam("kernel", kernel)
           .SetParam("num_filter", num_filter)
           .SetParam("stride", stride)
           .SetParam("dilate", dilate)
           .SetParam("pad", pad)
           .SetParam("num_group", num_group)
           .SetParam("workspace", workspace)
           .SetParam("no_bias", no_bias)
           .SetParam("cudnn_tune", _contrib_quantized_convCudnnTuneValues[int(cudnn_tune)])
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("layout", _contrib_quantized_convLayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .SetInput("min_weight", min_weight)
           .SetInput("max_weight", max_weight)
           .SetInput("min_bias", min_bias)
           .SetInput("max_bias", max_bias)
           .CreateSymbol(symbol_name);
}

/*! \brief Whether to pick convolution algo by running performance test.
 */
enum class ConvolutionCudnnTune {
  kNone = 0,
  kFastest = 1,
  kLimited_workspace = 2,
  kOff = 3
};

/*! \brief Set layout for input, output and weight. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are
 */
enum class ConvolutionLayout {
  kNone = 0,
  kNCDHW = 1,
  kNCHW = 2,
  kNCW = 3,
  kNDHWC = 4,
  kNHWC = 5
};

/*!
 * \brief Compute *N*-D convolution on *(N+2)*-D input.
 *
 *        In the 2-D convolution, given input data with shape *(batch_size,
 *        channel, height, width)*, the output is computed by
 *
 *        .. math::
 *
 *        out[n,i,:,:] = bias[i] + \sum_{j=0}^{channel} data[n,j,:,:] \star
 *        weight[i,j,:,:]
 *
 *        where :math:`\star` is the 2-D cross-correlation operator.
 *
 *        For general 2-D convolution, the shapes are
 *
 *        - **data**: *(batch_size, channel, height, width)*
 *        - **weight**: *(num_filter, channel, kernel[0], kernel[1])*
 *        - **bias**: *(num_filter,)*
 *        - **out**: *(batch_size, num_filter, out_height, out_width)*.
 *
 *        Define::
 *
 *        f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1
 *
 *        then we have::
 *
 *        out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
 *        out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])
 *
 *        If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
 *
 *        The default data ``layout`` is *NCHW*, namely *(batch_size, channel, height,
 *        width)*. We can choose other layouts such as *NWC*.
 *
 *        If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
 *        evenly into *g* parts along the channel axis, and also evenly split ``weight``
 *        along the first dimension. Next compute the convolution on the *i*-th part of
 *        the data with the *i*-th weight part. The output is obtained by concatenating
 *        the *g* results.
 *
 *        1-D convolution does not have *height* dimension but only *width* in space.
 *
 *        - **data**: *(batch_size, channel, width)*
 *        - **weight**: *(num_filter, channel, kernel[0])*
 *        - **bias**: *(num_filter,)*
 *        - **out**: *(batch_size, num_filter, out_width)*.
 *
 *        3-D convolution adds an additional *depth* dimension besides *height* and
 *        *width*. The shapes are
 *
 *        - **data**: *(batch_size, channel, depth, height, width)*
 *        - **weight**: *(num_filter, channel, kernel[0], kernel[1], kernel[2])*
 *        - **bias**: *(num_filter,)*
 *        - **out**: *(batch_size, num_filter, out_depth, out_height, out_width)*.
 *
 *        Both ``weight`` and ``bias`` are learnable parameters.
 *
 *        There are other options to tune the performance.
 *
 *        - **cudnn_tune**: enable this option leads to higher startup time but may give
 *        faster speed. Options are
 *
 *        - **off**: no tuning
 *        - **limited_workspace**:run test and pick the fastest algorithm that doesn't
 *        exceed workspace limit.
 *        - **fastest**: pick the fastest algorithm and ignore workspace limit.
 *        - **None** (default): the behavior is determined by environment variable
 *        ``MXNET_CUDNN_AUTOTUNE_DEFAULT``. 0 for off, 1 for limited workspace
 *        (default), 2 for fastest.
 *
 *        - **workspace**: A large number leads to more (GPU) memory usage but may improve
 *        the performance.
 *
 *
 *
 *        Defined in src/operator/nn/convolution.cc:L472
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the ConvolutionOp.
 * \param weight Weight matrix.
 * \param bias Bias parameter.
 * \param kernel Convolution kernel size: (w,), (h, w) or (d, h, w)
 * \param num_filter Convolution filter(channel) number
 * \param stride Convolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each
 * \param dilate Convolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each
 * \param pad Zero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.
 * \param num_group Number of group partitions.
 * \param workspace Maximum temporary workspace allowed (MB) in convolution.This
 *        parameter has two usages. When CUDNN is not used, it determines the effective
 *        batch size of the convolution kernel. When CUDNN is used, it controls the
 *        maximum temporary storage used for tuning the best CUDNN kernel when
 * \param no_bias Whether to disable bias parameter.
 * \param cudnn_tune Whether to pick convolution algo by running performance test.
 * \param cudnn_off Turn off cudnn for this layer.
 * \param layout Set layout for input, output and weight. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are
 * \return new symbol
 */
inline Symbol Convolution(const std::string& symbol_name,
                          Symbol data,
                          Symbol weight,
                          Symbol bias,
                          Shape kernel,
                          uint32_t num_filter,
                          Shape stride = {},
                          Shape dilate = {},
                          Shape pad = {},
                          uint32_t num_group = 1,
                          uint64_t workspace = 1024,
                          bool no_bias = false,
                          ConvolutionCudnnTune cudnn_tune = ConvolutionCudnnTune::kNone,
                          bool cudnn_off = false,
                          ConvolutionLayout layout = ConvolutionLayout::kNone) {
  static const char *ConvolutionCudnnTuneValues[] = {
    "None",
    "fastest",
    "limited_workspace",
    "off"
  };
  static const char *ConvolutionLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC"
  };
  return Operator("Convolution")
           .SetParam("kernel", kernel)
           .SetParam("num_filter", num_filter)
           .SetParam("stride", stride)
           .SetParam("dilate", dilate)
           .SetParam("pad", pad)
           .SetParam("num_group", num_group)
           .SetParam("workspace", workspace)
           .SetParam("no_bias", no_bias)
           .SetParam("cudnn_tune", ConvolutionCudnnTuneValues[int(cudnn_tune)])
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("layout", ConvolutionLayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data A ndarray/symbol of type `float32`
 * \param min_data The minimum scalar value possibly produced for the data
 * \param max_data The maximum scalar value possibly produced for the data
 * \return new symbol
 */
inline Symbol _contrib_quantized_flatten(const std::string& symbol_name,
                                         Symbol data,
                                         Symbol min_data,
                                         Symbol max_data) {
  return Operator("_contrib_quantized_flatten")
           .SetInput("data", data)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Fully Connected operator for input, weight and bias data type of int8,
 *        and accumulates in type int32 for the output. For each argument, two more
 *        float32 must be provided representing the thresholds of quantizing argument
 *        type float32 to int8. The final outputs contain the convolution result in
 *        and max thresholds representing the threholds for quantizing the float32 output
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/quantized_fully_connected.cc:L313
 * \param symbol_name name of the resulting symbol
 * \param data Input data.
 * \param weight weight.
 * \param bias bias.
 * \param min_data Minimum value of data.
 * \param max_data Maximum value of data.
 * \param min_weight Minimum value of weight.
 * \param max_weight Maximum value of weight.
 * \param min_bias Minimum value of bias.
 * \param max_bias Maximum value of bias.
 * \param num_hidden Number of hidden nodes of the output.
 * \param no_bias Whether to disable bias parameter.
 * \param flatten Whether to collapse all but the first axis of the input data tensor.
 * \return new symbol
 */
inline Symbol _contrib_quantized_fully_connected(const std::string& symbol_name,
                                                 Symbol data,
                                                 Symbol weight,
                                                 Symbol bias,
                                                 Symbol min_data,
                                                 Symbol max_data,
                                                 Symbol min_weight,
                                                 Symbol max_weight,
                                                 Symbol min_bias,
                                                 Symbol max_bias,
                                                 int num_hidden,
                                                 bool no_bias = false,
                                                 bool flatten = true) {
  return Operator("_contrib_quantized_fully_connected")
           .SetParam("num_hidden", num_hidden)
           .SetParam("no_bias", no_bias)
           .SetParam("flatten", flatten)
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .SetInput("min_weight", min_weight)
           .SetInput("max_weight", max_weight)
           .SetInput("min_bias", min_bias)
           .SetInput("max_bias", max_bias)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Applies a linear transformation: :math:`Y = XW^T + b`.
 *
 *        If ``flatten`` is set to be true, then the shapes are:
 *
 *        - **data**: `(batch_size, x1, x2, ..., xn)`
 *        - **weight**: `(num_hidden, x1 * x2 * ... * xn)`
 *        - **bias**: `(num_hidden,)`
 *        - **out**: `(batch_size, num_hidden)`
 *
 *        If ``flatten`` is set to be false, then the shapes are:
 *
 *        - **data**: `(x1, x2, ..., xn, input_dim)`
 *        - **weight**: `(num_hidden, input_dim)`
 *        - **bias**: `(num_hidden,)`
 *        - **out**: `(x1, x2, ..., xn, num_hidden)`
 *
 *        The learnable parameters include both ``weight`` and ``bias``.
 *
 *        If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
 *
 *        .. Note::
 *
 *        The sparse support for FullyConnected is limited to forward evaluation with
 *        weight and bias, where the length of `weight.indices` and `bias.indices` must
 *        to `num_hidden`. This could be useful for model inference with `row_sparse`
 *        trained with importance sampling or noise contrastive estimation.
 *
 *        To compute linear transformation with 'csr' sparse data, sparse.dot is
 *        of sparse.FullyConnected.
 *
 *
 *
 *        Defined in src/operator/nn/fully_connected.cc:L277
 * \param symbol_name name of the resulting symbol
 * \param data Input data.
 * \param weight Weight matrix.
 * \param bias Bias parameter.
 * \param num_hidden Number of hidden nodes of the output.
 * \param no_bias Whether to disable bias parameter.
 * \param flatten Whether to collapse all but the first axis of the input data tensor.
 * \return new symbol
 */
inline Symbol FullyConnected(const std::string& symbol_name,
                             Symbol data,
                             Symbol weight,
                             Symbol bias,
                             int num_hidden,
                             bool no_bias = false,
                             bool flatten = true) {
  return Operator("FullyConnected")
           .SetParam("num_hidden", num_hidden)
           .SetParam("no_bias", no_bias)
           .SetParam("flatten", flatten)
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .CreateSymbol(symbol_name);
}

/*! \brief Pooling type to be applied.
 */
enum class _contrib_quantized_poolingPoolType {
  kAvg = 0,
  kLp = 1,
  kMax = 2,
  kSum = 3
};

/*! \brief Pooling convention to be applied.
 */
enum class _contrib_quantized_poolingPoolingConvention {
  kFull = 0,
  kSame = 1,
  kValid = 2
};

/*! \brief Set layout for input and output. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.
 */
enum class _contrib_quantized_poolingLayout {
  kNone = 0,
  kNCDHW = 1,
  kNCHW = 2,
  kNCW = 3,
  kNDHWC = 4,
  kNHWC = 5,
  kNWC = 6
};

/*!
 * \brief Pooling operator for input and output data type of int8.
 *        The input and output data comes with min and max thresholds for quantizing
 *        the float32 data into int8.
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *        This operator only supports `pool_type` of `avg` or `max`.
 *
 *        Defined in src/operator/quantization/quantized_pooling.cc:L145
 * \param symbol_name name of the resulting symbol
 * \param data Input data.
 * \param min_data Minimum value of data.
 * \param max_data Maximum value of data.
 * \param kernel Pooling kernel size: (y, x) or (d, y, x)
 * \param pool_type Pooling type to be applied.
 * \param global_pool Ignore kernel size, do global pooling based on current input
 * \param cudnn_off Turn off cudnn pooling and use MXNet pooling operator.
 * \param pooling_convention Pooling convention to be applied.
 * \param stride Stride: for pooling (y, x) or (d, y, x). Defaults to 1 for each
 * \param pad Pad for pooling: (y, x) or (d, y, x). Defaults to no padding.
 * \param p_value Value of p for Lp pooling, can be 1 or 2, required for Lp Pooling.
 * \param count_include_pad Only used for AvgPool, specify whether to count padding
 *        elements for averagecalculation. For example, with a 5*5 kernel on a 3*3 corner
 *        of a image,the sum of the 9 valid elements will be divided by 25 if this is set
 * \param layout Set layout for input and output. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.
 * \return new symbol
 */
inline Symbol _contrib_quantized_pooling(const std::string& symbol_name,
                                         Symbol data,
                                         Symbol min_data,
                                         Symbol max_data,
                                         Shape kernel = {},
                                         _contrib_quantized_poolingPoolType pool_type = _contrib_quantized_poolingPoolType::kMax,
                                         bool global_pool = false,
                                         bool cudnn_off = false,
                                         _contrib_quantized_poolingPoolingConvention pooling_convention = _contrib_quantized_poolingPoolingConvention::kValid,
                                         Shape stride = {},
                                         Shape pad = {},
                                         dmlc::optional<int> p_value = dmlc::optional<int>(),
                                         dmlc::optional<bool> count_include_pad = dmlc::optional<bool>(),
                                         _contrib_quantized_poolingLayout layout = _contrib_quantized_poolingLayout::kNone) {
  static const char *_contrib_quantized_poolingPoolTypeValues[] = {
    "avg",
    "lp",
    "max",
    "sum"
  };
  static const char *_contrib_quantized_poolingPoolingConventionValues[] = {
    "full",
    "same",
    "valid"
  };
  static const char *_contrib_quantized_poolingLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC",
    "NWC"
  };
  return Operator("_contrib_quantized_pooling")
           .SetParam("kernel", kernel)
           .SetParam("pool_type", _contrib_quantized_poolingPoolTypeValues[int(pool_type)])
           .SetParam("global_pool", global_pool)
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("pooling_convention", _contrib_quantized_poolingPoolingConventionValues[int(pooling_convention)])
           .SetParam("stride", stride)
           .SetParam("pad", pad)
           .SetParam("p_value", p_value)
           .SetParam("count_include_pad", count_include_pad)
           .SetParam("layout", _contrib_quantized_poolingLayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .CreateSymbol(symbol_name);
}

/*! \brief Pooling type to be applied.
 */
enum class PoolingPoolType {
  kAvg = 0,
  kLp = 1,
  kMax = 2,
  kSum = 3
};

/*! \brief Pooling convention to be applied.
 */
enum class PoolingPoolingConvention {
  kFull = 0,
  kSame = 1,
  kValid = 2
};

/*! \brief Set layout for input and output. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.
 */
enum class PoolingLayout {
  kNone = 0,
  kNCDHW = 1,
  kNCHW = 2,
  kNCW = 3,
  kNDHWC = 4,
  kNHWC = 5,
  kNWC = 6
};

/*!
 * \brief Performs pooling on the input.
 *
 *        The shapes for 1-D pooling are
 *
 *        - **data** and **out**: *(batch_size, channel, width)* (NCW layout) or
 *        *(batch_size, width, channel)* (NWC layout),
 *
 *        The shapes for 2-D pooling are
 *
 *        - **data** and **out**: *(batch_size, channel, height, width)* (NCHW layout) or
 *        *(batch_size, height, width, channel)* (NHWC layout),
 *
 *        out_height = f(height, kernel[0], pad[0], stride[0])
 *        out_width = f(width, kernel[1], pad[1], stride[1])
 *
 *        The definition of *f* depends on ``pooling_convention``, which has two options:
 *
 *        - **valid** (default)::
 *
 *        f(x, k, p, s) = floor((x+2*p-k)/s)+1
 *
 *        - **full**, which is compatible with Caffe::
 *
 *        f(x, k, p, s) = ceil((x+2*p-k)/s)+1
 *
 *        When ``global_pool`` is set to be true, then global pooling is performed. It
 *        ``kernel=(height, width)`` and set the appropiate padding to 0.
 *
 *        Three pooling options are supported by ``pool_type``:
 *
 *        - **avg**: average pooling
 *        - **max**: max pooling
 *        - **sum**: sum pooling
 *        - **lp**: Lp pooling
 *
 *        For 3-D pooling, an additional *depth* dimension is added before
 *        *height*. Namely the input data and output will have shape *(batch_size,
 *        height, width)* (NCDHW layout) or *(batch_size, depth, height, width, channel)*
 *
 *        Notes on Lp pooling:
 *
 *        Lp pooling was first introduced by this paper:
 *        L-1 pooling is simply sum pooling, while L-inf pooling is simply max pooling.
 *        We can see that Lp pooling stands between those two, in practice the most
 *
 *        For each window ``X``, the mathematical expression for Lp pooling is:
 *
 *        :math:`f(X) = \sqrt[p]{\sum_{x}^{X} x^p}`
 *
 *
 *
 *        Defined in src/operator/nn/pooling.cc:L416
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the pooling operator.
 * \param kernel Pooling kernel size: (y, x) or (d, y, x)
 * \param pool_type Pooling type to be applied.
 * \param global_pool Ignore kernel size, do global pooling based on current input
 * \param cudnn_off Turn off cudnn pooling and use MXNet pooling operator.
 * \param pooling_convention Pooling convention to be applied.
 * \param stride Stride: for pooling (y, x) or (d, y, x). Defaults to 1 for each
 * \param pad Pad for pooling: (y, x) or (d, y, x). Defaults to no padding.
 * \param p_value Value of p for Lp pooling, can be 1 or 2, required for Lp Pooling.
 * \param count_include_pad Only used for AvgPool, specify whether to count padding
 *        elements for averagecalculation. For example, with a 5*5 kernel on a 3*3 corner
 *        of a image,the sum of the 9 valid elements will be divided by 25 if this is set
 * \param layout Set layout for input and output. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.
 * \return new symbol
 */
inline Symbol Pooling(const std::string& symbol_name,
                      Symbol data,
                      Shape kernel = {},
                      PoolingPoolType pool_type = PoolingPoolType::kMax,
                      bool global_pool = false,
                      bool cudnn_off = false,
                      PoolingPoolingConvention pooling_convention = PoolingPoolingConvention::kValid,
                      Shape stride = {},
                      Shape pad = {},
                      dmlc::optional<int> p_value = dmlc::optional<int>(),
                      dmlc::optional<bool> count_include_pad = dmlc::optional<bool>(),
                      PoolingLayout layout = PoolingLayout::kNone) {
  static const char *PoolingPoolTypeValues[] = {
    "avg",
    "lp",
    "max",
    "sum"
  };
  static const char *PoolingPoolingConventionValues[] = {
    "full",
    "same",
    "valid"
  };
  static const char *PoolingLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC",
    "NWC"
  };
  return Operator("Pooling")
           .SetParam("kernel", kernel)
           .SetParam("pool_type", PoolingPoolTypeValues[int(pool_type)])
           .SetParam("global_pool", global_pool)
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("pooling_convention", PoolingPoolingConventionValues[int(pooling_convention)])
           .SetParam("stride", stride)
           .SetParam("pad", pad)
           .SetParam("p_value", p_value)
           .SetParam("count_include_pad", count_include_pad)
           .SetParam("layout", PoolingLayoutValues[int(layout)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief Output data type. `auto` can be specified to automatically determine output
 */
enum class _contrib_quantize_v2OutType {
  kAuto = 0,
  kInt8 = 1,
  kUint8 = 2
};

/*!
 * \brief Quantize a input tensor from float to `out_type`,
 *        with user-specified `min_calib_range` and `max_calib_range` or the input range
 *
 *        Output `min_range` and `max_range` are scalar floats that specify the range for
 *
 *        When out_type is `uint8`, the output is calculated using the following equation:
 *
 *        `out[i] = (in[i] - min_range) * range(OUTPUT_TYPE) / (max_range - min_range) +
 *
 *        where `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`.
 *
 *        When out_type is `int8`, the output is calculate using the following equation
 *        by keep zero centered for the quantized value:
 *
 *        `out[i] = sign(in[i]) * min(abs(in[i] * scale + 0.5f, quantized_range)`,
 *
 *        where
 *        `quantized_range = MinAbs(max(int8), min(int8))` and
 *        `scale = quantized_range / MaxAbs(min_range, max_range).`
 *
 *        When out_type is `auto`, the output type is automatically determined by
 *        If min_calib_range < 0.0f, the output type will be int8, otherwise will be
 *        If min_calib_range isn't presented, the output type will be int8.
 *
 *        .. Note::
 *        This operator only supports forward propagation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/quantize_v2.cc:L92
 * \param symbol_name name of the resulting symbol
 * \param data A ndarray/symbol of type `float32`
 * \param out_type Output data type. `auto` can be specified to automatically determine
 * \param min_calib_range The minimum scalar value in the form of float32. If present, it
 * \param max_calib_range The maximum scalar value in the form of float32. If present, it
 * \return new symbol
 */
inline Symbol _contrib_quantize_v2(const std::string& symbol_name,
                                   Symbol data,
                                   _contrib_quantize_v2OutType out_type = _contrib_quantize_v2OutType::kInt8,
                                   mx_float min_calib_range = mx_float(),
                                   mx_float max_calib_range = mx_float()) {
  static const char *_contrib_quantize_v2OutTypeValues[] = {
    "auto",
    "int8",
    "uint8"
  };
  return Operator("_contrib_quantize_v2")
           .SetParam("out_type", _contrib_quantize_v2OutTypeValues[int(out_type)])
           .SetParam("min_calib_range", min_calib_range)
           .SetParam("max_calib_range", max_calib_range)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Joins input arrays along a given axis.
 *
 *        The dimensions of the input arrays should be the same except the axis along
 *        which they will be concatenated.
 *        The dimension of the output array along the concatenated axis will be equal
 *        to the sum of the corresponding dimensions of the input arrays.
 *        All inputs with different min/max will be rescaled by using largest [min, max]
 *        If any input holds int8, then the output will be int8. Otherwise output will be
 *
 *
 *
 *        Defined in src/operator/quantization/quantized_concat.cc:L108
 * \param symbol_name name of the resulting symbol
 * \param data List of arrays to concatenate
 * \param num_args Number of inputs to be concated.
 * \param dim the dimension to be concated.
 * \return new symbol
 */
inline Symbol _contrib_quantized_concat(const std::string& symbol_name,
                                        const std::vector<Symbol>& data,
                                        int num_args,
                                        int dim = 1) {
  return Operator("_contrib_quantized_concat")
           .SetParam("num_args", num_args)
           .SetParam("dim", dim)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Joins input arrays along a given axis.
 *
 *        .. note:: `Concat` is deprecated. Use `concat` instead.
 *
 *        The dimensions of the input arrays should be the same except the axis along
 *        which they will be concatenated.
 *        The dimension of the output array along the concatenated axis will be equal
 *        to the sum of the corresponding dimensions of the input arrays.
 *
 *        The storage type of ``concat`` output depends on storage types of inputs
 *
 *        - concat(csr, csr, ..., csr, dim=0) = csr
 *        - otherwise, ``concat`` generates output with default storage
 *
 *        Example::
 *
 *        x = [[1,1],[2,2]]
 *        y = [[3,3],[4,4],[5,5]]
 *        z = [[6,6], [7,7],[8,8]]
 *
 *        concat(x,y,z,dim=0) = [[ 1.,  1.],
 *        [ 2.,  2.],
 *        [ 3.,  3.],
 *        [ 4.,  4.],
 *        [ 5.,  5.],
 *        [ 6.,  6.],
 *        [ 7.,  7.],
 *        [ 8.,  8.]]
 *
 *        Note that you cannot concat x,y,z along dimension 1 since dimension
 *        0 is not the same for all the input arrays.
 *
 *        concat(y,z,dim=1) = [[ 3.,  3.,  6.,  6.],
 *        [ 4.,  4.,  7.,  7.],
 *        [ 5.,  5.,  8.,  8.]]
 *
 *
 *
 *        Defined in src/operator/nn/concat.cc:L371
 * \param symbol_name name of the resulting symbol
 * \param data List of arrays to concatenate
 * \param num_args Number of inputs to be concated.
 * \param dim the dimension to be concated.
 * \return new symbol
 */
inline Symbol Concat(const std::string& symbol_name,
                     const std::vector<Symbol>& data,
                     int num_args,
                     int dim = 1) {
  return Operator("Concat")
           .SetParam("num_args", num_args)
           .SetParam("dim", dim)
(data)
           .CreateSymbol(symbol_name);
}

/*! \brief Output data type. `auto` can be specified to automatically determine output
 */
enum class _contrib_requantizeOutType {
  kAuto = 0,
  kInt8 = 1,
  kUint8 = 2
};

/*!
 * \brief Given data that is quantized in int32 and the corresponding thresholds,
 *        requantize the data into int8 using min and max thresholds either calculated at
 *        or from calibration. It's highly recommended to pre-calucate the min and max
 *        through calibration since it is able to save the runtime of the operator and
 *        inference accuracy.
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/requantize.cc:L60
 * \param symbol_name name of the resulting symbol
 * \param data A ndarray/symbol of type `int32`
 * \param min_range The original minimum scalar value in the form of float32 used for
 * \param max_range The original maximum scalar value in the form of float32 used for
 * \param out_type Output data type. `auto` can be specified to automatically determine
 * \param min_calib_range The minimum scalar value in the form of float32 obtained
 *        through calibration. If present, it will be used to requantize the int32 data
 * \param max_calib_range The maximum scalar value in the form of float32 obtained
 *        through calibration. If present, it will be used to requantize the int32 data
 * \return new symbol
 */
inline Symbol _contrib_requantize(const std::string& symbol_name,
                                  Symbol data,
                                  Symbol min_range,
                                  Symbol max_range,
                                  _contrib_requantizeOutType out_type = _contrib_requantizeOutType::kInt8,
                                  mx_float min_calib_range = mx_float(),
                                  mx_float max_calib_range = mx_float()) {
  static const char *_contrib_requantizeOutTypeValues[] = {
    "auto",
    "int8",
    "uint8"
  };
  return Operator("_contrib_requantize")
           .SetParam("out_type", _contrib_requantizeOutTypeValues[int(out_type)])
           .SetParam("min_calib_range", min_calib_range)
           .SetParam("max_calib_range", max_calib_range)
           .SetInput("data", data)
           .SetInput("min_range", min_range)
           .SetInput("max_range", max_range)
           .CreateSymbol(symbol_name);
}

/*! \brief Activation function to be applied.
 */
enum class _contrib_quantized_actActType {
  kRelu = 0,
  kSigmoid = 1,
  kSoftrelu = 2,
  kSoftsign = 3,
  kTanh = 4
};

/*!
 * \brief Activation operator for input and output data type of int8.
 *        The input and output data comes with min and max thresholds for quantizing
 *        the float32 data into int8.
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *        This operator only supports `relu`
 *
 *        Defined in src/operator/quantization/quantized_activation.cc:L91
 * \param symbol_name name of the resulting symbol
 * \param data Input data.
 * \param min_data Minimum value of data.
 * \param max_data Maximum value of data.
 * \param act_type Activation function to be applied.
 * \return new symbol
 */
inline Symbol _contrib_quantized_act(const std::string& symbol_name,
                                     Symbol data,
                                     Symbol min_data,
                                     Symbol max_data,
                                     _contrib_quantized_actActType act_type) {
  static const char *_contrib_quantized_actActTypeValues[] = {
    "relu",
    "sigmoid",
    "softrelu",
    "softsign",
    "tanh"
  };
  return Operator("_contrib_quantized_act")
           .SetParam("act_type", _contrib_quantized_actActTypeValues[int(act_type)])
           .SetInput("data", data)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .CreateSymbol(symbol_name);
}

/*! \brief Activation function to be applied.
 */
enum class ActivationActType {
  kRelu = 0,
  kSigmoid = 1,
  kSoftrelu = 2,
  kSoftsign = 3,
  kTanh = 4
};

/*!
 * \brief Applies an activation function element-wise to the input.
 *
 *        The following activation functions are supported:
 *
 *        - `relu`: Rectified Linear Unit, :math:`y = max(x, 0)`
 *        - `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}`
 *        - `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) +
 *        - `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))`
 *        - `softsign`: :math:`y = \frac{x}{1 + abs(x)}`
 *
 *
 *
 *        Defined in src/operator/nn/activation.cc:L167
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \param act_type Activation function to be applied.
 * \return new symbol
 */
inline Symbol Activation(const std::string& symbol_name,
                         Symbol data,
                         ActivationActType act_type) {
  static const char *ActivationActTypeValues[] = {
    "relu",
    "sigmoid",
    "softrelu",
    "softsign",
    "tanh"
  };
  return Operator("Activation")
           .SetParam("act_type", ActivationActTypeValues[int(act_type)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief Output data type.
 */
enum class _contrib_quantizeOutType {
  kInt8 = 0,
  kUint8 = 1
};

/*!
 * \brief Quantize a input tensor from float to `out_type`,
 *        with user-specified `min_range` and `max_range`.
 *
 *        min_range and max_range are scalar floats that specify the range for
 *        the input data.
 *
 *        When out_type is `uint8`, the output is calculated using the following equation:
 *
 *        `out[i] = (in[i] - min_range) * range(OUTPUT_TYPE) / (max_range - min_range) +
 *
 *        where `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`.
 *
 *        When out_type is `int8`, the output is calculate using the following equation
 *        by keep zero centered for the quantized value:
 *
 *        `out[i] = sign(in[i]) * min(abs(in[i] * scale + 0.5f, quantized_range)`,
 *
 *        where
 *        `quantized_range = MinAbs(max(int8), min(int8))` and
 *        `scale = quantized_range / MaxAbs(min_range, max_range).`
 *
 *        .. Note::
 *        This operator only supports forward propagation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/quantize.cc:L74
 * \param symbol_name name of the resulting symbol
 * \param data A ndarray/symbol of type `float32`
 * \param min_range The minimum scalar value possibly produced for the input
 * \param max_range The maximum scalar value possibly produced for the input
 * \param out_type Output data type.
 * \return new symbol
 */
inline Symbol _contrib_quantize(const std::string& symbol_name,
                                Symbol data,
                                Symbol min_range,
                                Symbol max_range,
                                _contrib_quantizeOutType out_type = _contrib_quantizeOutType::kUint8) {
  static const char *_contrib_quantizeOutTypeValues[] = {
    "int8",
    "uint8"
  };
  return Operator("_contrib_quantize")
           .SetParam("out_type", _contrib_quantizeOutTypeValues[int(out_type)])
           .SetInput("data", data)
           .SetInput("min_range", min_range)
           .SetInput("max_range", max_range)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Apply a custom operator implemented in a frontend language (like Python).
 *
 *        Custom operators should override required methods like `forward` and `backward`.
 *        The custom operator must be registered before it can be used.
 *        Please check the tutorial here: http://mxnet.io/faq/new_op.html.
 *
 *
 *
 *        Defined in src/operator/custom/custom.cc:L546
 * \param symbol_name name of the resulting symbol
 * \param data Input data for the custom operator.
 * \param op_type Name of the custom operator. This is the name that is passed to
 * \return new symbol
 */
inline Symbol Custom(const std::string& symbol_name,
                     const std::vector<Symbol>& data,
                     const std::string& op_type) {
  return Operator("Custom")
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Batch normalization.
 *
 *        Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
 *        well as offset ``beta``.
 *        Standard BN [1]_ implementation only normalize the data within each device.
 *        SyncBN normalizes the input within the whole mini-batch.
 *        We follow the sync-onece implmentation described in the paper [2]_.
 *
 *        Assume the input has more than one dimension and we normalize along axis 1.
 *        We first compute the mean and variance along this axis:
 *
 *        .. math::
 *
 *        data\_mean[i] = mean(data[:,i,:,...]) \\
 *        data\_var[i] = var(data[:,i,:,...])
 *
 *        Then compute the normalized output, which has the same shape as input, as
 *
 *        .. math::
 *
 *        out[:,i,:,...] = \frac{data[:,i,:,...] -
 *
 *        Both *mean* and *var* returns a scalar by treating the input as a vector.
 *
 *        Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 *        have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
 *        ``data_var`` as well, which are needed for the backward pass.
 *
 *        Besides the inputs and the outputs, this operator accepts two auxiliary
 *        states, ``moving_mean`` and ``moving_var``, which are *k*-length
 *        vectors. They are global statistics for the whole dataset, which are updated
 *        by::
 *
 *        moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
 *        moving_var = moving_var * momentum + data_var * (1 - momentum)
 *
 *        If ``use_global_stats`` is set to be true, then ``moving_mean`` and
 *        ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
 *        the output. It is often used during inference.
 *
 *        Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
 *        then set ``gamma`` to 1 and its gradient to 0.
 *
 *        Reference:
 *        .. [1] Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating
 *        deep network training by reducing internal covariate shift." *ICML 2015*
 *        .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, \
 *        Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation."
 *
 *
 *        Defined in src/operator/contrib/sync_batch_norm.cc:L97
 * \param symbol_name name of the resulting symbol
 * \param data Input data to batch normalization
 * \param gamma gamma array
 * \param beta beta array
 * \param moving_mean running mean of input
 * \param moving_var running variance of input
 * \param key Hash key for synchronization, please set the same hash key for same layer,
 * \param eps Epsilon to prevent div 0
 * \param momentum Momentum for moving average
 * \param fix_gamma Fix gamma while training
 * \param use_global_stats Whether use global moving statistics instead of local
 * \param output_mean_var Output All,normal mean and var
 * \param ndev The count of GPU devices
 * \return new symbol
 */
inline Symbol _contrib_SyncBatchNorm(const std::string& symbol_name,
                                     Symbol data,
                                     Symbol gamma,
                                     Symbol beta,
                                     Symbol moving_mean,
                                     Symbol moving_var,
                                     const std::string& key,
                                     mx_float eps = 0.00100000005,
                                     mx_float momentum = 0.899999976,
                                     bool fix_gamma = true,
                                     bool use_global_stats = false,
                                     bool output_mean_var = false,
                                     int ndev = 1) {
  return Operator("_contrib_SyncBatchNorm")
           .SetParam("eps", eps)
           .SetParam("momentum", momentum)
           .SetParam("fix_gamma", fix_gamma)
           .SetParam("use_global_stats", use_global_stats)
           .SetParam("output_mean_var", output_mean_var)
           .SetParam("ndev", ndev)
           .SetInput("data", data)
           .SetInput("gamma", gamma)
           .SetInput("beta", beta)
           .SetInput("moving_mean", moving_mean)
           .SetInput("moving_var", moving_var)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operator samples sub-graphs from a csr graph via an
 *        uniform probability. The operator is designed for DGL.
 *
 *        The operator outputs three sets of NDArrays to represent the sampled results
 *        (the number of NDArrays in each set is the same as the number of seed NDArrays):
 *        1) a set of 1D NDArrays containing the sampled vertices, 2) a set of
 *        the sampled edges, 3) a set of 1D NDArrays indicating the layer where a vertex
 *        The first set of 1D NDArrays have a length of max_num_vertices+1. The last
 *        indicate the acutal number of vertices in a subgraph. The third set of NDArrays
 *        of max_num_vertices, and the valid number of vertices is the same as the ones
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        shape = (5, 5)
 *        data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
 *        indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
 *        indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
 *        a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
 *        a.asnumpy()
 *        seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
 *        out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2,
 *
 *        out[0]
 *        [0 1 2 3 4 5]
 *        <NDArray 6 @cpu(0)>
 *
 *        out[1].asnumpy()
 *        array([[ 0,  1,  0,  3,  0],
 *        [ 5,  0,  0,  7,  0],
 *        [ 9,  0,  0, 11,  0],
 *        [13,  0, 15,  0,  0],
 *        [17,  0, 19,  0,  0]])
 *
 *        out[2]
 *        [0 0 0 0 0]
 *        <NDArray 5 @cpu(0)>
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L784
 * \param symbol_name name of the resulting symbol
 * \param csr_matrix csr matrix
 * \param seed_arrays seed vertices
 * \param num_args Number of input NDArray.
 * \param num_hops Number of hops.
 * \param num_neighbor Number of neighbor.
 * \param max_num_vertices Max number of vertices.
 * \return new symbol
 */
inline Symbol _contrib_dgl_csr_neighbor_uniform_sample(const std::string& symbol_name,
                                                       Symbol csr_matrix,
                                                       const std::vector<Symbol>& seed_arrays,
                                                       int num_args,
                                                       int64_t num_hops = 1,
                                                       int64_t num_neighbor = 2,
                                                       int64_t max_num_vertices = 100) {
  return Operator("_contrib_dgl_csr_neighbor_uniform_sample")
           .SetParam("num_args", num_args)
           .SetParam("num_hops", num_hops)
           .SetParam("num_neighbor", num_neighbor)
           .SetParam("max_num_vertices", max_num_vertices)
           .SetInput("csr_matrix", csr_matrix)
(seed_arrays)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operator samples sub-graph from a csr graph via an
 *        non-uniform probability. The operator is designed for DGL.
 *
 *        The operator outputs four sets of NDArrays to represent the sampled results
 *        (the number of NDArrays in each set is the same as the number of seed NDArrays):
 *        1) a set of 1D NDArrays containing the sampled vertices, 2) a set of
 *        the sampled edges, 3) a set of 1D NDArrays with the probability that vertices
 *        4) a set of 1D NDArrays indicating the layer where a vertex is sampled.
 *        The first set of 1D NDArrays have a length of max_num_vertices+1. The last
 *        indicate the acutal number of vertices in a subgraph. The third and fourth set
 *        of max_num_vertices, and the valid number of vertices is the same as the ones
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        shape = (5, 5)
 *        prob = mx.nd.array([0.9, 0.8, 0.2, 0.4, 0.1], dtype=np.float32)
 *        data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
 *        indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
 *        indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
 *        a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
 *        seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
 *        out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed,
 *
 *        out[0]
 *        [0 1 2 3 4 5]
 *        <NDArray 6 @cpu(0)>
 *
 *        out[1].asnumpy()
 *        array([[ 0,  1,  2,  0,  0],
 *        [ 5,  0,  6,  0,  0],
 *        [ 9, 10,  0,  0,  0],
 *        [13, 14,  0,  0,  0],
 *        [ 0, 18, 19,  0,  0]])
 *
 *        out[2]
 *        [0.9 0.8 0.2 0.4 0.1]
 *        <NDArray 5 @cpu(0)>
 *
 *        out[3]
 *        [0 0 0 0 0]
 *        <NDArray 5 @cpu(0)>
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L883
 * \param symbol_name name of the resulting symbol
 * \param csr_matrix csr matrix
 * \param probability probability vector
 * \param seed_arrays seed vertices
 * \param num_args Number of input NDArray.
 * \param num_hops Number of hops.
 * \param num_neighbor Number of neighbor.
 * \param max_num_vertices Max number of vertices.
 * \return new symbol
 */
inline Symbol _contrib_dgl_csr_neighbor_non_uniform_sample(const std::string& symbol_name,
                                                           Symbol csr_matrix,
                                                           Symbol probability,
                                                           const std::vector<Symbol>& seed_arrays,
                                                           int num_args,
                                                           int64_t num_hops = 1,
                                                           int64_t num_neighbor = 2,
                                                           int64_t max_num_vertices = 100) {
  return Operator("_contrib_dgl_csr_neighbor_non_uniform_sample")
           .SetParam("num_args", num_args)
           .SetParam("num_hops", num_hops)
           .SetParam("num_neighbor", num_neighbor)
           .SetParam("max_num_vertices", max_num_vertices)
           .SetInput("csr_matrix", csr_matrix)
           .SetInput("probability", probability)
(seed_arrays)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operator constructs an induced subgraph for
 *        a given set of vertices from a graph. The operator accepts multiple
 *        sets of vertices as input. For each set of vertices, it returns a pair
 *        of CSR matrices if return_mapping is True: the first matrix contains edges
 *        with new edge Ids, the second matrix contains edges with the original
 *        edge Ids.
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        x=[[1, 0, 0, 2],
 *        [3, 0, 4, 0],
 *        [0, 5, 0, 0],
 *        [0, 6, 7, 0]]
 *        v = [0, 1, 2]
 *        dgl_subgraph(x, v, return_mapping=True) =
 *        [[1, 0, 0],
 *        [2, 0, 3],
 *        [0, 4, 0]],
 *        [[1, 0, 0],
 *        [3, 0, 4],
 *        [0, 5, 0]]
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L1140
 * \param symbol_name name of the resulting symbol
 * \param graph Input graph where we sample vertices.
 * \param data The input arrays that include data arrays and states.
 * \param num_args Number of input arguments, including all symbol inputs.
 * \param return_mapping Return mapping of vid and eid between the subgraph and the
 * \return new symbol
 */
inline Symbol _contrib_dgl_subgraph(const std::string& symbol_name,
                                    Symbol graph,
                                    const std::vector<Symbol>& data,
                                    int num_args,
                                    bool return_mapping) {
  return Operator("_contrib_dgl_subgraph")
           .SetParam("num_args", num_args)
           .SetParam("return_mapping", return_mapping)
           .SetInput("graph", graph)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operator implements the edge_id function for a graph
 *        stored in a CSR matrix (the value of the CSR stores the edge Id of the graph).
 *        output[i] = input[u[i], v[i]] if there is an edge between u[i] and v[i]],
 *        otherwise output[i] will be -1. Both u and v should be 1D vectors.
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        x = [[ 1, 0, 0 ],
 *        [ 0, 2, 0 ],
 *        [ 0, 0, 3 ]]
 *        u = [ 0, 0, 1, 1, 2, 2 ]
 *        v = [ 0, 1, 1, 2, 0, 2 ]
 *        edge_id(x, u, v) = [ 1, -1, 2, -1, -1, 3 ]
 *
 *        The storage type of ``edge_id`` output depends on storage types of inputs
 *        - edge_id(csr, default, default) = default
 *        - default and rsp inputs are not supported
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L1321
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \param u u ndarray
 * \param v v ndarray
 * \return new symbol
 */
inline Symbol _contrib_edge_id(const std::string& symbol_name,
                               Symbol data,
                               Symbol u,
                               Symbol v) {
  return Operator("_contrib_edge_id")
           .SetInput("data", data)
           .SetInput("u", u)
           .SetInput("v", v)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operator converts a CSR matrix whose values are edge Ids
 *        to an adjacency matrix whose values are ones. The output CSR matrix always has
 *        the data value of float32.
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        x = [[ 1, 0, 0 ],
 *        [ 0, 2, 0 ],
 *        [ 0, 0, 3 ]]
 *        dgl_adjacency(x) =
 *        [[ 1, 0, 0 ],
 *        [ 0, 1, 0 ],
 *        [ 0, 0, 1 ]]
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L1393
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \return new symbol
 */
inline Symbol _contrib_dgl_adjacency(const std::string& symbol_name,
                                     Symbol data) {
  return Operator("_contrib_dgl_adjacency")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operator compacts a CSR matrix generated by
 *        dgl_csr_neighbor_uniform_sample and dgl_csr_neighbor_non_uniform_sample.
 *        The CSR matrices generated by these two operators may have many empty
 *        rows at the end and many empty columns. This operator removes these
 *        empty rows and empty columns.
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        shape = (5, 5)
 *        data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
 *        indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
 *        indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
 *        a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
 *        seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
 *        out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2,
 *        num_neighbor=2, max_num_vertices=6)
 *        subg_v = out[0]
 *        subg = out[1]
 *        compact = mx.nd.contrib.dgl_graph_compact(subg, subg_v,
 *        graph_sizes=(subg_v[-1].asnumpy()[0]), return_mapping=False)
 *
 *        compact.asnumpy()
 *        array([[0, 0, 0, 1, 0],
 *        [2, 0, 3, 0, 0],
 *        [0, 4, 0, 0, 5],
 *        [0, 6, 0, 0, 7],
 *        [8, 9, 0, 0, 0]])
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L1582
 * \param symbol_name name of the resulting symbol
 * \param graph_data Input graphs and input vertex Ids.
 * \param num_args Number of input arguments.
 * \param return_mapping Return mapping of vid and eid between the subgraph and the
 * \param graph_sizes the number of vertices in each graph.
 * \return new symbol
 */
inline Symbol _contrib_dgl_graph_compact(const std::string& symbol_name,
                                         const std::vector<Symbol>& graph_data,
                                         int num_args,
                                         bool return_mapping,
                                         nnvm::Tuple<int64_t> graph_sizes) {
  return Operator("_contrib_dgl_graph_compact")
           .SetParam("num_args", num_args)
           .SetParam("return_mapping", return_mapping)
           .SetParam("graph_sizes", graph_sizes)
(graph_data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the Khatri-Rao product of the input matrices.
 *
 *        Given a collection of :math:`n` input matrices,
 *
 *        .. math::
 *        A_1 \in \mathbb{R}^{M_1 \times M}, \ldots, A_n \in \mathbb{R}^{M_n \times N},
 *
 *        the (column-wise) Khatri-Rao product is defined as the matrix,
 *
 *        .. math::
 *        X = A_1 \otimes \cdots \otimes A_n \in \mathbb{R}^{(M_1 \cdots M_n) \times N},
 *
 *        where the :math:`k` th column is equal to the column-wise outer product
 *        :math:`{A_1}_k \otimes \cdots \otimes {A_n}_k` where :math:`{A_i}_k` is the kth
 *        column of the ith matrix.
 *
 *        Example::
 *
 *        >>> A = mx.nd.array([[1, -1],
 *        >>>                  [2, -3]])
 *        >>> B = mx.nd.array([[1, 4],
 *        >>>                  [2, 5],
 *        >>>                  [3, 6]])
 *        >>> C = mx.nd.khatri_rao(A, B)
 *        >>> print(C.asnumpy())
 *        [[  1.  -4.]
 *        [  2.  -5.]
 *        [  3.  -6.]
 *        [  2. -12.]
 *        [  4. -15.]
 *        [  6. -18.]]
 *
 *
 *
 *        Defined in src/operator/contrib/krprod.cc:L108
 * \param symbol_name name of the resulting symbol
 * \param args Positional input matrices
 * \return new symbol
 */
inline Symbol khatri_rao(const std::string& symbol_name,
                         const std::vector<Symbol>& args) {
  return Operator("khatri_rao")
(args)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes the log likelihood of a univariate Hawkes process.
 *
 *        The log likelihood is calculated on point process observations represented
 *        as *ragged* matrices for *lags* (interarrival times w.r.t. the previous point),
 *        and *marks* (identifiers for the process ID). Note that each mark is considered
 *        i.e., computes the joint likelihood of a set of Hawkes processes determined by
 *
 *        .. math::
 *
 *        \lambda_k^*(t) = \lambda_k + \alpha_k \sum_{\{t_i < t, y_i = k\}} \beta_k
 *
 *        where :math:`\lambda_k` specifies the background intensity ``lda``,
 *        :math:`\alpha_k` specifies the *branching ratio* or ``alpha``, and
 *
 *        ``lags`` and ``marks`` are two NDArrays of shape (N, T) and correspond to the
 *        representation of the point process observation, the first dimension
 *        corresponds to the batch index, and the second to the sequence. These are
 *        "left-aligned" *ragged* matrices (the first index of the second dimension is
 *        the beginning of every sequence. The length of each sequence is given by
 *        ``valid_length``, of shape (N,) where ``valid_length[i]`` corresponds to the
 *
 *        ``max_time`` is the length of the observation period of the point process. That
 *        is, specifying ``max_time[i] = 5`` computes the likelihood of the i-th sample
 *        as observed on the time interval :math:`(0, 5]`. Naturally, the sum of all
 *
 *        The input ``state`` specifies the *memory* of the Hawkes process. Invoking the
 *
 *        .. math::
 *
 *        s_k(t) = \sum_{t_i < t} \exp(-\beta_k (t - t_i)).
 *
 *        The ``state`` to be provided is :math:`s_k(0)` and carries the added intensity
 *        due to past events before the current batch. :math:`s_k(T)` is returned from
 *
 *        Example::
 *
 *        # define the Hawkes process parameters
 *        lda = nd.array([1.5, 2.0, 3.0]).tile((N, 1))
 *        alpha = nd.array([0.2, 0.3, 0.4])  # branching ratios should be < 1
 *        beta = nd.array([1.0, 2.0, 3.0])
 *
 *        # the "data", or observations
 *        ia_times = nd.array([[6, 7, 8, 9], [1, 2, 3, 4], [3, 4, 5, 6], [8, 9, 10, 11]])
 *        marks = nd.zeros((N, T)).astype(np.int32)
 *
 *        # starting "state" of the process
 *        states = nd.zeros((N, K))
 *
 *        valid_length = nd.array([1, 2, 3, 4])  # number of valid points in each sequence
 *        max_time = nd.ones((N,)) * 100.0  # length of the observation period
 *
 *        A = nd.contrib.hawkesll(
 *        lda, alpha, beta, states, ia_times, marks, valid_length, max_time
 *        )
 *
 *        References:
 *
 *        -  Bacry, E., Mastromatteo, I., & Muzy, J. F. (2015).
 *        Hawkes processes in finance. Market Microstructure and Liquidity
 *        , 1(01), 1550005.
 *
 *
 *        Defined in src/operator/contrib/hawkes_ll.cc:L84
 * \param symbol_name name of the resulting symbol
 * \param lda Shape (N, K) The intensity for each of the K processes, for each sample
 * \param alpha Shape (K,) The infectivity factor (branching ratio) for each process
 * \param beta Shape (K,) The decay parameter for each process
 * \param state Shape (N, K) the Hawkes state for each process
 * \param lags Shape (N, T) the interarrival times
 * \param marks Shape (N, T) the marks (process ids)
 * \param valid_length The number of valid points in the process
 * \param max_time the length of the interval where the processes were sampled
 * \return new symbol
 */
inline Symbol _contrib_hawkesll(const std::string& symbol_name,
                                Symbol lda,
                                Symbol alpha,
                                Symbol beta,
                                Symbol state,
                                Symbol lags,
                                Symbol marks,
                                Symbol valid_length,
                                Symbol max_time) {
  return Operator("_contrib_hawkesll")
           .SetInput("lda", lda)
           .SetInput("alpha", alpha)
           .SetInput("beta", beta)
           .SetInput("state", state)
           .SetInput("lags", lags)
           .SetInput("marks", marks)
           .SetInput("valid_length", valid_length)
           .SetInput("max_time", max_time)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \return new symbol
 */
inline Symbol _contrib_backward_hawkesll(const std::string& symbol_name) {
  return Operator("_contrib_backward_hawkesll")
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Number of stored values for a sparse tensor, including explicit zeros.
 *
 *        This operator only supports CSR matrix on CPU.
 *
 *
 *
 *        Defined in src/operator/contrib/nnz.cc:L177
 * \param symbol_name name of the resulting symbol
 * \param data Input
 * \param axis Select between the number of values across the whole matrix, in each
 * \return new symbol
 */
inline Symbol _contrib_getnnz(const std::string& symbol_name,
                              Symbol data,
                              dmlc::optional<int> axis = dmlc::optional<int>()) {
  return Operator("_contrib_getnnz")
           .SetParam("axis", axis)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operator implements the gradient multiplier function.
 *        In forward pass it acts as an identity transform. During backpropagation it
 *        multiplies the gradient from the subsequent level by a scalar factor lambda and
 *        the preceding layer.
 *
 *
 *        Defined in src/operator/contrib/gradient_multiplier_op.cc:L78
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \param scalar lambda multiplier
 * \return new symbol
 */
inline Symbol _contrib_gradientmultiplier(const std::string& symbol_name,
                                          Symbol data,
                                          mx_float scalar) {
  return Operator("_contrib_gradientmultiplier")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _contrib_backward_gradientmultiplier(const std::string& symbol_name,
                                                   Symbol data,
                                                   mx_float scalar) {
  return Operator("_contrib_backward_gradientmultiplier")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for multi-precision AdamW optimizer.
 *
 *        AdamW is seen as a modification of Adam by decoupling the weight decay from the
 *        optimization steps taken w.r.t. the loss function.
 *
 *        Adam update consists of the following steps, where g represents gradient and m,
 *        are 1st and 2nd order moment estimates (mean and variance).
 *
 *        .. math::
 *
 *        g_t = \nabla J(W_{t-1})\\
 *        m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t\\
 *        v_t = \beta_2 v_{t-1} + (1 - \beta_2) g_t^2\\
 *        W_t = W_{t-1} - \eta_t (\alpha \frac{ m_t }{ \sqrt{ v_t } + \epsilon } + wd
 *
 *        It updates the weights using::
 *
 *        m = beta1*m + (1-beta1)*grad
 *        v = beta2*v + (1-beta2)*(grad**2)
 *        w -= eta * (learning_rate * m / (sqrt(v) + epsilon) + w * wd)
 *
 *        Note that gradient is rescaled to grad = rescale_grad * grad. If rescale_grad
 *        the update is skipped.
 *
 *
 *        Defined in src/operator/contrib/adamw.cc:L77
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param mean Moving mean
 * \param var Moving variance
 * \param weight32 Weight32
 * \param rescale_grad Rescale gradient to rescale_grad * grad. If NaN, Inf, or 0, the
 * \param lr Learning rate
 * \param eta Learning rate schedule multiplier
 * \param beta1 The decay rate for the 1st moment estimates.
 * \param beta2 The decay rate for the 2nd moment estimates.
 * \param epsilon A small constant for numerical stability.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol _mp_adamw_update(const std::string& symbol_name,
                               Symbol weight,
                               Symbol grad,
                               Symbol mean,
                               Symbol var,
                               Symbol weight32,
                               Symbol rescale_grad,
                               mx_float lr,
                               mx_float eta,
                               mx_float beta1 = 0.899999976,
                               mx_float beta2 = 0.999000013,
                               mx_float epsilon = 9.99999994e-09,
                               mx_float wd = 0,
                               mx_float clip_gradient = -1) {
  return Operator("_mp_adamw_update")
           .SetParam("lr", lr)
           .SetParam("eta", eta)
           .SetParam("beta1", beta1)
           .SetParam("beta2", beta2)
           .SetParam("epsilon", epsilon)
           .SetParam("wd", wd)
           .SetParam("clip_gradient", clip_gradient)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mean", mean)
           .SetInput("var", var)
           .SetInput("weight32", weight32)
           .SetInput("rescale_grad", rescale_grad)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for AdamW optimizer. AdamW is seen as a modification of
 *        Adam by decoupling the weight decay from the optimization steps taken w.r.t.
 *
 *        Adam update consists of the following steps, where g represents gradient and m,
 *        are 1st and 2nd order moment estimates (mean and variance).
 *
 *        .. math::
 *
 *        g_t = \nabla J(W_{t-1})\\
 *        m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t\\
 *        v_t = \beta_2 v_{t-1} + (1 - \beta_2) g_t^2\\
 *        W_t = W_{t-1} - \eta_t (\alpha \frac{ m_t }{ \sqrt{ v_t } + \epsilon } + wd
 *
 *        It updates the weights using::
 *
 *        m = beta1*m + (1-beta1)*grad
 *        v = beta2*v + (1-beta2)*(grad**2)
 *        w -= eta * (learning_rate * m / (sqrt(v) + epsilon) + w * wd)
 *
 *        Note that gradient is rescaled to grad = rescale_grad * grad. If rescale_grad
 *        the update is skipped.
 *
 *
 *        Defined in src/operator/contrib/adamw.cc:L120
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param mean Moving mean
 * \param var Moving variance
 * \param rescale_grad Rescale gradient to rescale_grad * grad. If NaN, Inf, or 0, the
 * \param lr Learning rate
 * \param eta Learning rate schedule multiplier
 * \param beta1 The decay rate for the 1st moment estimates.
 * \param beta2 The decay rate for the 2nd moment estimates.
 * \param epsilon A small constant for numerical stability.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol _adamw_update(const std::string& symbol_name,
                            Symbol weight,
                            Symbol grad,
                            Symbol mean,
                            Symbol var,
                            Symbol rescale_grad,
                            mx_float lr,
                            mx_float eta,
                            mx_float beta1 = 0.899999976,
                            mx_float beta2 = 0.999000013,
                            mx_float epsilon = 9.99999994e-09,
                            mx_float wd = 0,
                            mx_float clip_gradient = -1) {
  return Operator("_adamw_update")
           .SetParam("lr", lr)
           .SetParam("eta", eta)
           .SetParam("beta1", beta1)
           .SetParam("beta2", beta2)
           .SetParam("epsilon", epsilon)
           .SetParam("wd", wd)
           .SetParam("clip_gradient", clip_gradient)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mean", mean)
           .SetInput("var", var)
           .SetInput("rescale_grad", rescale_grad)
           .CreateSymbol(symbol_name);
}

/*! \brief resizing mode. "simple" - output height equals parameter "height" if
 *        "scale_height" parameter is not defined or input height multiplied by
 *        "scale_height" otherwise. Same for width;"odd_scale" - if original height or
 *        width is odd, then result height is calculated like result_h = (original_h - 1)
 *        * scale + 1; for scale > 1 the result shape would be like if we did
 *        deconvolution with kernel = (1, 1) and stride = (height_scale, width_scale);
 *        and for scale < 1 shape would be like we did convolution with kernel = (1, 1)
 *        and stride = (int(1 / height_scale), int( 1/ width_scale);"like" - resize first
 *        input to the height and width of second input; "to_even_down" - resize input to
 *        nearest lower even height and width (if original height is odd then result
 *        height = original height - 1);"to_even_up" - resize input to nearest bigger
 *        even height and width (if original height is odd then result height = original
 *        height + 1);"to_odd_down" - resize input to nearest odd height and width (if
 *        original height is odd then result height = original height - 1);"to_odd_up" -
 *        resize input to nearest odd height and width (if original height is odd then
 */
enum class _contrib_BilinearResize2DMode {
  kLike = 0,
  kOdd_scale = 1,
  kSize = 2,
  kTo_even_down = 3,
  kTo_even_up = 4,
  kTo_odd_down = 5,
  kTo_odd_up = 6
};

/*!
 * \brief
 *        Perform 2D resizing (upsampling or downsampling) for 4D input using bilinear
 *
 *        Expected input is a 4 dimensional NDArray (NCHW) and the output
 *        with the shape of (N x C x height x width).
 *        The key idea of bilinear interpolation is to perform linear interpolation
 *        first in one direction, and then again in the other direction. See the
 *        `Bilinear interpolation
 *        for more details.
 *
 *
 *        Defined in src/operator/contrib/bilinear_resize.cc:L193
 * \param symbol_name name of the resulting symbol
 * \param data Input data
 * \param like Resize data to it's shape
 * \param height output height (required, but ignored if scale_height is defined or mode
 * \param width output width (required, but ignored if scale_width is defined or mode is
 * \param scale_height sampling scale of the height (optional, used in modes "scale" and
 * \param scale_width sampling scale of the width (optional, used in modes "scale" and
 * \param mode resizing mode. "simple" - output height equals parameter "height" if
 *        "scale_height" parameter is not defined or input height multiplied by
 *        "scale_height" otherwise. Same for width;"odd_scale" - if original height or
 *        width is odd, then result height is calculated like result_h = (original_h - 1)
 *        * scale + 1; for scale > 1 the result shape would be like if we did
 *        deconvolution with kernel = (1, 1) and stride = (height_scale, width_scale);
 *        and for scale < 1 shape would be like we did convolution with kernel = (1, 1)
 *        and stride = (int(1 / height_scale), int( 1/ width_scale);"like" - resize first
 *        input to the height and width of second input; "to_even_down" - resize input to
 *        nearest lower even height and width (if original height is odd then result
 *        height = original height - 1);"to_even_up" - resize input to nearest bigger
 *        even height and width (if original height is odd then result height = original
 *        height + 1);"to_odd_down" - resize input to nearest odd height and width (if
 *        original height is odd then result height = original height - 1);"to_odd_up" -
 *        resize input to nearest odd height and width (if original height is odd then
 * \return new symbol
 */
inline Symbol _contrib_BilinearResize2D(const std::string& symbol_name,
                                        Symbol data,
                                        Symbol like,
                                        int height = 1,
                                        int width = 1,
                                        mx_float scale_height = mx_float(),
                                        mx_float scale_width = mx_float(),
                                        _contrib_BilinearResize2DMode mode = _contrib_BilinearResize2DMode::kSize) {
  static const char *_contrib_BilinearResize2DModeValues[] = {
    "like",
    "odd_scale",
    "size",
    "to_even_down",
    "to_even_up",
    "to_odd_down",
    "to_odd_up"
  };
  return Operator("_contrib_BilinearResize2D")
           .SetParam("height", height)
           .SetParam("width", width)
           .SetParam("scale_height", scale_height)
           .SetParam("scale_width", scale_width)
           .SetParam("mode", _contrib_BilinearResize2DModeValues[int(mode)])
           .SetInput("data", data)
           .SetInput("like", like)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief This operators implements the quadratic function.
 *
 *        .. math::
 *        f(x) = ax^2+bx+c
 *
 *        where :math:`x` is an input tensor and all operations
 *        in the function are element-wise.
 *
 *        Example::
 *
 *        x = [[1, 2], [3, 4]]
 *        y = quadratic(data=x, a=1, b=2, c=3)
 *        y = [[6, 11], [18, 27]]
 *
 *        The storage type of ``quadratic`` output depends on storage types of inputs
 *        - quadratic(csr, a, b, 0) = csr
 *        - quadratic(default, a, b, c) = default
 *
 *
 *
 *        Defined in src/operator/contrib/quadratic_op.cc:L50
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \param a Coefficient of the quadratic term in the quadratic function.
 * \param b Coefficient of the linear term in the quadratic function.
 * \param c Constant term in the quadratic function.
 * \return new symbol
 */
inline Symbol _contrib_quadratic(const std::string& symbol_name,
                                 Symbol data,
                                 mx_float a = 0,
                                 mx_float b = 0,
                                 mx_float c = 0) {
  return Operator("_contrib_quadratic")
           .SetParam("a", a)
           .SetParam("b", b)
           .SetParam("c", c)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \return new symbol
 */
inline Symbol _contrib_backward_quadratic(const std::string& symbol_name) {
  return Operator("_contrib_backward_quadratic")
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Rescale the input by the square root of the channel dimension.
 *
 *        out = data / sqrt(data.shape[-1])
 *
 *
 *
 *        Defined in src/operator/contrib/transformer.cc:L38
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \return new symbol
 */
inline Symbol _contrib_div_sqrt_dim(const std::string& symbol_name,
                                    Symbol data) {
  return Operator("_contrib_div_sqrt_dim")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns an array of indexes of the input array.
 *
 *        For an input array with shape  :math:`(d_1, d_2, ..., d_n)`, `index_array`
 *        :math:`(d_1, d_2, ..., d_n, n)` array `idx`, where
 *        :math:`idx[i_1, i_2, ..., i_n, :] = [i_1, i_2, ..., i_n]`.
 *
 *        Additionally, when the parameter `axes` is specified, `idx` will be a
 *        :math:`(d_1, d_2, ..., d_n, m)` array where `m` is the length of `axes`, and
 *        equality will hold: :math:`idx[i_1, i_2, ..., i_n, j] = i_{axes[j]}`.
 *
 *        Examples::
 *
 *        x = mx.nd.ones((3, 2))
 *
 *        mx.nd.contrib.index_array(x) = [[[0 0]
 *        [0 1]]
 *
 *        [[1 0]
 *        [1 1]]
 *
 *        [[2 0]
 *        [2 1]]]
 *
 *        x = mx.nd.ones((3, 2, 2))
 *
 *        mx.nd.contrib.index_array(x, axes=(1, 0)) = [[[[0 0]
 *        [0 0]]
 *
 *        [[1 0]
 *        [1 0]]]
 *
 *
 *        [[[0 1]
 *        [0 1]]
 *
 *        [[1 1]
 *        [1 1]]]
 *
 *
 *        [[[0 2]
 *        [0 2]]
 *
 *        [[1 2]
 *        [1 2]]]]
 *
 *
 *
 *        Defined in src/operator/contrib/index_array.cc:L118
 * \param symbol_name name of the resulting symbol
 * \param data Input data
 * \param axes The axes to include in the index array. Supports negative values.
 * \return new symbol
 */
inline Symbol _contrib_index_array(const std::string& symbol_name,
                                   Symbol data,
                                   dmlc::optional<Shape> axes = dmlc::optional<Shape>()) {
  return Operator("_contrib_index_array")
           .SetParam("axes", axes)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Copies the elements of a `new_tensor` into the `old_tensor`.
 *
 *        This operator copies the elements by selecting the indices in the order given
 *        The output will be a new tensor containing the rest elements of old tensor and
 *        the copied elements of new tensor.
 *        For example, if `index[i] == j`, then the `i` th row of `new_tensor` is copied
 *        `j` th row of output.
 *
 *        The `index` must be a vector and it must have the same size with the `0` th
 *        `new_tensor`. Also, the `0` th dimension of old_tensor must `>=` the `0` th
 *        `new_tensor`, or an error will be raised.
 *
 *        Examples::
 *
 *        x = mx.nd.zeros((5,3))
 *        t = mx.nd.array([[1,2,3],[4,5,6],[7,8,9]])
 *        index = mx.nd.array([0,4,2])
 *
 *        mx.nd.contrib.index_copy(x, index, t)
 *
 *        [[1. 2. 3.]
 *        [0. 0. 0.]
 *        [7. 8. 9.]
 *        [0. 0. 0.]
 *        [4. 5. 6.]]
 *        <NDArray 5x3 @cpu(0)>
 *
 *
 *
 *        Defined in src/operator/contrib/index_copy.cc:L183
 * \param symbol_name name of the resulting symbol
 * \param old_tensor Old tensor
 * \param index_vector Index vector
 * \param new_tensor New tensor to be copied
 * \return new symbol
 */
inline Symbol _contrib_index_copy(const std::string& symbol_name,
                                  Symbol old_tensor,
                                  Symbol index_vector,
                                  Symbol new_tensor) {
  return Operator("_contrib_index_copy")
           .SetInput("old_tensor", old_tensor)
           .SetInput("index_vector", index_vector)
           .SetInput("new_tensor", new_tensor)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \return new symbol
 */
inline Symbol _contrib_backward_index_copy(const std::string& symbol_name) {
  return Operator("_contrib_backward_index_copy")
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *        This operator takes a 4D feature map as an input array and region proposals as
 *        then align the feature map over sub-regions of input and produces a fixed-sized
 *        This operator is typically used in Faster R-CNN & Mask R-CNN networks.
 *
 *        Different from ROI pooling, ROI Align removes the harsh quantization, properly
 *        the extracted features with the input. RoIAlign computes the value of each
 *        by bilinear interpolation from the nearby grid points on the feature map. No
 *        performed on any coordinates involved in the RoI, its bins, or the sampling
 *        Bilinear interpolation is used to compute the exact values of the
 *        input features at four regularly sampled locations in each RoI bin.
 *        Then the feature map can be aggregated by avgpooling.
 *
 *
 *        References
 *        ----------
 *
 *        He, Kaiming, et al. "Mask R-CNN." ICCV, 2017
 *
 *
 *        Defined in src/operator/contrib/roi_align.cc:L538
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the pooling operator, a 4D Feature maps
 * \param rois Bounding box coordinates, a 2D array
 * \param pooled_size ROI Align output roi feature map height and width: (h, w)
 * \param spatial_scale Ratio of input feature map height (or w) to raw image height (or
 * \param sample_ratio Optional sampling ratio of ROI align, using adaptive size by
 * \param position_sensitive Whether to perform position-sensitive RoI pooling.
 *        PSRoIPooling is first proposaled by R-FCN and it can reduce the input channels
 * \return new symbol
 */
inline Symbol _contrib_ROIAlign(const std::string& symbol_name,
                                Symbol data,
                                Symbol rois,
                                Shape pooled_size,
                                mx_float spatial_scale,
                                int sample_ratio = -1,
                                bool position_sensitive = false) {
  return Operator("_contrib_ROIAlign")
           .SetParam("pooled_size", pooled_size)
           .SetParam("spatial_scale", spatial_scale)
           .SetParam("sample_ratio", sample_ratio)
           .SetParam("position_sensitive", position_sensitive)
           .SetInput("data", data)
           .SetInput("rois", rois)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Check if all the float numbers in the array are finite (used for AMP)
 *
 *
 *        Defined in src/operator/contrib/all_finite.cc:L101
 * \param symbol_name name of the resulting symbol
 * \param data Array
 * \param init_output Initialize output to 1.
 * \return new symbol
 */
inline Symbol all_finite(const std::string& symbol_name,
                         Symbol data,
                         bool init_output = true) {
  return Operator("all_finite")
           .SetParam("init_output", init_output)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Check if all the float numbers in all the arrays are finite (used for AMP)
 *
 *
 *        Defined in src/operator/contrib/all_finite.cc:L133
 * \param symbol_name name of the resulting symbol
 * \param data Arrays
 * \param num_arrays Number of arrays.
 * \param init_output Initialize output to 1.
 * \return new symbol
 */
inline Symbol multi_all_finite(const std::string& symbol_name,
                               const std::vector<Symbol>& data,
                               int num_arrays = 1,
                               bool init_output = true) {
  return Operator("multi_all_finite")
           .SetParam("num_arrays", num_arrays)
           .SetParam("init_output", init_output)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for Group AdaGrad optimizer.
 *
 *        Referenced from *Adaptive Subgradient Methods for Online Learning and
 *        and available at http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf but
 *        uses only a single learning rate for every row of the parameter array.
 *
 *        Updates are applied by::
 *
 *        grad = clip(grad * rescale_grad, clip_gradient)
 *        history += mean(square(grad), axis=1, keepdims=True)
 *        div = grad / sqrt(history + float_stable_eps)
 *        weight -= div * lr
 *
 *        Weights are updated lazily if the gradient is sparse.
 *
 *        Note that non-zero values for the weight decay option are not supported.
 *
 *
 *
 *        Defined in src/operator/contrib/optimizer_op.cc:L71
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param history History
 * \param lr Learning rate
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param epsilon Epsilon for numerical stability
 * \return new symbol
 */
inline Symbol _contrib_group_adagrad_update(const std::string& symbol_name,
                                            Symbol weight,
                                            Symbol grad,
                                            Symbol history,
                                            mx_float lr,
                                            mx_float rescale_grad = 1,
                                            mx_float clip_gradient = -1,
                                            mx_float epsilon = 9.99999975e-06) {
  return Operator("_contrib_group_adagrad_update")
           .SetParam("lr", lr)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("epsilon", epsilon)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("history", history)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *        Given an n-d NDArray data, and a 1-d NDArray index,
 *        the operator produces an un-predeterminable shaped n-d NDArray out,
 *        which stands for the rows in x where the corresonding element in index is
 *
 *        >>> data = mx.nd.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
 *        >>> index = mx.nd.array([0, 1, 0])
 *        >>> out = mx.nd.contrib.boolean_mask(data, index)
 *        >>> out
 *
 *        [[4. 5. 6.]]
 *        <NDArray 1x3 @cpu(0)>
 *
 *
 *
 *        Defined in src/operator/contrib/boolean_mask.cc:L211
 * \param symbol_name name of the resulting symbol
 * \param data Data
 * \param index Mask
 * \param axis An integer that represents the axis in NDArray to mask from.
 * \return new symbol
 */
inline Symbol _contrib_boolean_mask(const std::string& symbol_name,
                                    Symbol data,
                                    Symbol index,
                                    int axis = 0) {
  return Operator("_contrib_boolean_mask")
           .SetParam("axis", axis)
           .SetInput("data", data)
           .SetInput("index", index)
           .CreateSymbol(symbol_name);
}

/*! \brief The input box encoding type.
 *        "corner" means boxes are encoded as [xmin, ymin, xmax, ymax], "center" means
 */
enum class _contrib_box_nmsInFormat {
  kCenter = 0,
  kCorner = 1
};

/*! \brief The output box encoding type.
 *        "corner" means boxes are encoded as [xmin, ymin, xmax, ymax], "center" means
 */
enum class _contrib_box_nmsOutFormat {
  kCenter = 0,
  kCorner = 1
};

/*!
 * \brief Apply non-maximum suppression to input.
 *
 *        The output will be sorted in descending order according to `score`. Boxes with
 *        overlaps larger than `overlap_thresh`, smaller scores and background boxes
 *        will be removed and filled with -1, the corresponding position will be recorded
 *        for backward propogation.
 *
 *        During back-propagation, the gradient will be copied to the original
 *        position according to the input index. For positions that have been suppressed,
 *        the in_grad will be assigned 0.
 *        In summary, gradients are sticked to its boxes, will either be moved or
 *        according to its original index in input.
 *
 *        Input requirements::
 *
 *        1. Input tensor have at least 2 dimensions, (n, k), any higher dims will be
 *        as batch, e.g. (a, b, c, d, n, k) == (a*b*c*d, n, k)
 *        2. n is the number of boxes in each batch
 *        3. k is the width of each box item.
 *
 *        By default, a box is [id, score, xmin, ymin, xmax, ymax, ...],
 *        additional elements are allowed.
 *
 *        - `id_index`: optional, use -1 to ignore, useful if `force_suppress=False`,
 *        we will skip highly overlapped boxes if one is `apple` while the other is `car`.
 *
 *        - `background_id`: optional, default=-1, class id for background boxes, useful
 *        when `id_index >= 0` which means boxes with background id will be filtered
 *
 *        - `coord_start`: required, default=2, the starting index of the 4 coordinates.
 *        Two formats are supported:
 *
 *        - `corner`: [xmin, ymin, xmax, ymax]
 *
 *        - `center`: [x, y, width, height]
 *
 *        - `score_index`: required, default=1, box score/confidence.
 *        When two boxes overlap IOU > `overlap_thresh`, the one with smaller score will
 *
 *        - `in_format` and `out_format`: default='corner', specify in/out box formats.
 *
 *        Examples::
 *
 *        x = [[0, 0.5, 0.1, 0.1, 0.2, 0.2], [1, 0.4, 0.1, 0.1, 0.2, 0.2],
 *        [0, 0.3, 0.1, 0.1, 0.14, 0.14], [2, 0.6, 0.5, 0.5, 0.7, 0.8]]
 *        box_nms(x, overlap_thresh=0.1, coord_start=2, score_index=1, id_index=0,
 *        force_suppress=True, in_format='corner', out_typ='corner') =
 *        [[2, 0.6, 0.5, 0.5, 0.7, 0.8], [0, 0.5, 0.1, 0.1, 0.2, 0.2],
 *        [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]
 *        out_grad = [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
 *        [0.3, 0.3, 0.3, 0.3, 0.3, 0.3], [0.4, 0.4, 0.4, 0.4, 0.4, 0.4]]
 *        # exe.backward
 *        in_grad = [[0.2, 0.2, 0.2, 0.2, 0.2, 0.2], [0, 0, 0, 0, 0, 0],
 *        [0, 0, 0, 0, 0, 0], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]
 *
 *
 *
 *        Defined in src/operator/contrib/bounding_box.cc:L93
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param overlap_thresh Overlapping(IoU) threshold to suppress object with smaller score.
 * \param valid_thresh Filter input boxes to those whose scores greater than valid_thresh.
 * \param topk Apply nms to topk boxes with descending scores, -1 to no restriction.
 * \param coord_start Start index of the consecutive 4 coordinates.
 * \param score_index Index of the scores/confidence of boxes.
 * \param id_index Optional, index of the class categories, -1 to disable.
 * \param background_id Optional, id of the background class which will be ignored in nms.
 * \param force_suppress Optional, if set false and id_index is provided, nms will only
 * \param in_format The input box encoding type.
 *        "corner" means boxes are encoded as [xmin, ymin, xmax, ymax], "center" means
 * \param out_format The output box encoding type.
 *        "corner" means boxes are encoded as [xmin, ymin, xmax, ymax], "center" means
 * \return new symbol
 */
inline Symbol _contrib_box_nms(const std::string& symbol_name,
                               Symbol data,
                               mx_float overlap_thresh = 0.5,
                               mx_float valid_thresh = 0,
                               int topk = -1,
                               int coord_start = 2,
                               int score_index = 1,
                               int id_index = -1,
                               int background_id = -1,
                               bool force_suppress = false,
                               _contrib_box_nmsInFormat in_format = _contrib_box_nmsInFormat::kCorner,
                               _contrib_box_nmsOutFormat out_format = _contrib_box_nmsOutFormat::kCorner) {
  static const char *_contrib_box_nmsInFormatValues[] = {
    "center",
    "corner"
  };
  static const char *_contrib_box_nmsOutFormatValues[] = {
    "center",
    "corner"
  };
  return Operator("_contrib_box_nms")
           .SetParam("overlap_thresh", overlap_thresh)
           .SetParam("valid_thresh", valid_thresh)
           .SetParam("topk", topk)
           .SetParam("coord_start", coord_start)
           .SetParam("score_index", score_index)
           .SetParam("id_index", id_index)
           .SetParam("background_id", background_id)
           .SetParam("force_suppress", force_suppress)
           .SetParam("in_format", _contrib_box_nmsInFormatValues[int(in_format)])
           .SetParam("out_format", _contrib_box_nmsOutFormatValues[int(out_format)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief The box encoding type.
 *        "corner" means boxes are encoded as [xmin, ymin, xmax, ymax], "center" means
 */
enum class _contrib_box_iouFormat {
  kCenter = 0,
  kCorner = 1
};

/*!
 * \brief Bounding box overlap of two arrays.
 *        The overlap is defined as Intersection-over-Union, aka, IOU.
 *        - lhs: (a_1, a_2, ..., a_n, 4) array
 *        - rhs: (b_1, b_2, ..., b_n, 4) array
 *        - output: (a_1, a_2, ..., a_n, b_1, b_2, ..., b_n) array
 *
 *        Note::
 *
 *        Zero gradients are back-propagated in this op for now.
 *
 *        Example::
 *
 *        x = [[0.5, 0.5, 1.0, 1.0], [0.0, 0.0, 0.5, 0.5]]
 *        y = [[0.25, 0.25, 0.75, 0.75]]
 *        box_iou(x, y, format='corner') = [[0.1428], [0.1428]]
 *
 *
 *
 *        Defined in src/operator/contrib/bounding_box.cc:L134
 * \param symbol_name name of the resulting symbol
 * \param lhs The first input
 * \param rhs The second input
 * \param format The box encoding type.
 *        "corner" means boxes are encoded as [xmin, ymin, xmax, ymax], "center" means
 * \return new symbol
 */
inline Symbol _contrib_box_iou(const std::string& symbol_name,
                               Symbol lhs,
                               Symbol rhs,
                               _contrib_box_iouFormat format = _contrib_box_iouFormat::kCorner) {
  static const char *_contrib_box_iouFormatValues[] = {
    "center",
    "corner"
  };
  return Operator("_contrib_box_iou")
           .SetParam("format", _contrib_box_iouFormatValues[int(format)])
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Compute bipartite matching.
 *        The matching is performed on score matrix with shape [B, N, M]
 *        - B: batch_size
 *        - N: number of rows to match
 *        - M: number of columns as reference to be matched against.
 *
 *        Returns:
 *        x : matched column indices. -1 indicating non-matched elements in rows.
 *        y : matched row indices.
 *
 *        Note::
 *
 *        Zero gradients are back-propagated in this op for now.
 *
 *        Example::
 *
 *        s = [[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]]
 *        x, y = bipartite_matching(x, threshold=1e-12, is_ascend=False)
 *        x = [1, -1, 0]
 *        y = [2, 0]
 *
 *
 *
 *        Defined in src/operator/contrib/bounding_box.cc:L180
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param threshold Ignore matching when score < thresh, if is_ascend=false, or ignore
 * \param is_ascend Use ascend order for scores instead of descending. Please set
 * \param topk Limit the number of matches to topk, set -1 for no limit
 * \return new symbol
 */
inline Symbol _contrib_bipartite_matching(const std::string& symbol_name,
                                          Symbol data,
                                          mx_float threshold,
                                          bool is_ascend = false,
                                          int topk = -1) {
  return Operator("_contrib_bipartite_matching")
           .SetParam("threshold", threshold)
           .SetParam("is_ascend", is_ascend)
           .SetParam("topk", topk)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *        Applies a 2D adaptive average pooling over a 4D input with the shape of (NCHW).
 *        The pooling kernel and stride sizes are automatically chosen for desired output
 *
 *        - If a single integer is provided for output_size, the output size is \
 *        (N x C x output_size x output_size) for any input (NCHW).
 *
 *        - If a tuple of integers (height, width) are provided for output_size, the
 *        (N x C x height x width) for any input (NCHW).
 *
 *
 *
 *        Defined in src/operator/contrib/adaptive_avg_pooling.cc:L214
 * \param symbol_name name of the resulting symbol
 * \param data Input data
 * \param output_size int (output size) or a tuple of int for output (height, width).
 * \return new symbol
 */
inline Symbol _contrib_AdaptiveAvgPooling2D(const std::string& symbol_name,
                                            Symbol data,
                                            Shape output_size = {}) {
  return Operator("_contrib_AdaptiveAvgPooling2D")
           .SetParam("output_size", output_size)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *        Calculate the mean and variance of `data`.
 *
 *        The mean and variance are calculated by aggregating the contents of data across
 *        If x is 1-D and axes = [0] this is just the mean and variance of a vector.
 *
 *        Example:
 *
 *        x = [[1, 2, 3], [4, 5, 6]]
 *        mean, var = moments(data=x, axes=[0])
 *        mean = [2.5, 3.5, 4.5]
 *        var = [2.25, 2.25, 2.25]
 *        mean, var = moments(data=x, axes=[1])
 *        mean = [2.0, 5.0]
 *        var = [0.66666667, 0.66666667]
 *        mean, var = moments(data=x, axis=[0, 1])
 *        mean = [3.5]
 *        var = [2.9166667]
 *
 *
 *
 *        Defined in src/operator/nn/moments.cc:L54
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \param axes Array of ints. Axes along which to compute mean and variance.
 * \param keepdims produce moments with the same dimensionality as the input.
 * \return new symbol
 */
inline Symbol moments(const std::string& symbol_name,
                      Symbol data,
                      dmlc::optional<Shape> axes = dmlc::optional<Shape>(),
                      bool keepdims = false) {
  return Operator("moments")
           .SetParam("axes", axes)
           .SetParam("keepdims", keepdims)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to the same as
 */
enum class SoftmaxDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Applies the softmax function.
 *
 *        The resulting array contains elements in the range (0,1) and the elements along
 *
 *        .. math::
 *        softmax(\mathbf{z/t})_j = \frac{e^{z_j/t}}{\sum_{k=1}^K e^{z_k/t}}
 *
 *        for :math:`j = 1, ..., K`
 *
 *        t is the temperature parameter in softmax function. By default, t equals 1.0
 *
 *        Example::
 *
 *        x = [[ 1.  1.  1.]
 *        [ 1.  1.  1.]]
 *
 *        softmax(x,axis=0) = [[ 0.5  0.5  0.5]
 *        [ 0.5  0.5  0.5]]
 *
 *        softmax(x,axis=1) = [[ 0.33333334,  0.33333334,  0.33333334],
 *        [ 0.33333334,  0.33333334,  0.33333334]]
 *
 *
 *
 *        Defined in src/operator/nn/softmax.cc:L93
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \param axis The axis along which to compute softmax.
 * \param temperature Temperature parameter in softmax
 * \param dtype DType of the output in case this can't be inferred. Defaults to the same
 * \return new symbol
 */
inline Symbol softmax(const std::string& symbol_name,
                      Symbol data,
                      int axis = -1,
                      dmlc::optional<double> temperature = dmlc::optional<double>(),
                      SoftmaxDtype dtype = SoftmaxDtype::kNone) {
  static const char *SoftmaxDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("softmax")
           .SetParam("axis", axis)
           .SetParam("temperature", temperature)
           .SetParam("dtype", SoftmaxDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to the same as
 */
enum class SoftminDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Applies the softmin function.
 *
 *        The resulting array contains elements in the range (0,1) and the elements along
 *        up to 1.
 *
 *        .. math::
 *        softmin(\mathbf{z/t})_j = \frac{e^{-z_j/t}}{\sum_{k=1}^K e^{-z_k/t}}
 *
 *        for :math:`j = 1, ..., K`
 *
 *        t is the temperature parameter in softmax function. By default, t equals 1.0
 *
 *        Example::
 *
 *        x = [[ 1.  2.  3.]
 *        [ 3.  2.  1.]]
 *
 *        softmin(x,axis=0) = [[ 0.88079703,  0.5,  0.11920292],
 *        [ 0.11920292,  0.5,  0.88079703]]
 *
 *        softmin(x,axis=1) = [[ 0.66524094,  0.24472848,  0.09003057],
 *        [ 0.09003057,  0.24472848,  0.66524094]]
 *
 *
 *
 *        Defined in src/operator/nn/softmax.cc:L153
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \param axis The axis along which to compute softmax.
 * \param temperature Temperature parameter in softmax
 * \param dtype DType of the output in case this can't be inferred. Defaults to the same
 * \return new symbol
 */
inline Symbol softmin(const std::string& symbol_name,
                      Symbol data,
                      int axis = -1,
                      dmlc::optional<double> temperature = dmlc::optional<double>(),
                      SoftminDtype dtype = SoftminDtype::kNone) {
  static const char *SoftminDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("softmin")
           .SetParam("axis", axis)
           .SetParam("temperature", temperature)
           .SetParam("dtype", SoftminDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief DType of the output in case this can't be inferred. Defaults to the same as
 */
enum class Log_softmaxDtype {
  kNone = 0,
  kFloat16 = 1,
  kFloat32 = 2,
  kFloat64 = 3
};

/*!
 * \brief Computes the log softmax of the input.
 *        This is equivalent to computing softmax followed by log.
 *
 *        Examples::
 *
 *        >>> x = mx.nd.array([1, 2, .1])
 *        >>> mx.nd.log_softmax(x).asnumpy()
 *        array([-1.41702998, -0.41702995, -2.31702995], dtype=float32)
 *
 *        >>> x = mx.nd.array( [[1, 2, .1],[.1, 2, 1]] )
 *        >>> mx.nd.log_softmax(x, axis=0).asnumpy()
 *        array([[-0.34115392, -0.69314718, -1.24115396],
 *        [-1.24115396, -0.69314718, -0.34115392]], dtype=float32)
 *
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \param axis The axis along which to compute softmax.
 * \param temperature Temperature parameter in softmax
 * \param dtype DType of the output in case this can't be inferred. Defaults to the same
 * \return new symbol
 */
inline Symbol log_softmax(const std::string& symbol_name,
                          Symbol data,
                          int axis = -1,
                          dmlc::optional<double> temperature = dmlc::optional<double>(),
                          Log_softmaxDtype dtype = Log_softmaxDtype::kNone) {
  static const char *Log_softmaxDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("log_softmax")
           .SetParam("axis", axis)
           .SetParam("temperature", temperature)
           .SetParam("dtype", Log_softmaxDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief Whether to pick convolution algorithm by running performance test.
 */
enum class DeconvolutionCudnnTune {
  kNone = 0,
  kFastest = 1,
  kLimited_workspace = 2,
  kOff = 3
};

/*! \brief Set layout for input, output and weight. Empty for default layout, NCW for 1d,
 */
enum class DeconvolutionLayout {
  kNone = 0,
  kNCDHW = 1,
  kNCHW = 2,
  kNCW = 3,
  kNDHWC = 4,
  kNHWC = 5
};

/*!
 * \brief Computes 1D or 2D transposed convolution (aka fractionally strided convolution)
 *        of the input tensor. This operation can be seen as the gradient of Convolution
 *        operation with respect to its input. Convolution usually reduces the size of
 *        the input. Transposed convolution works the other way, going from a smaller
 * \param symbol_name name of the resulting symbol
 * \param data Input tensor to the deconvolution operation.
 * \param weight Weights representing the kernel.
 * \param bias Bias added to the result after the deconvolution operation.
 * \param kernel Deconvolution kernel size: (w,), (h, w) or (d, h, w). This is same as
 * \param num_filter Number of output filters.
 * \param stride The stride used for the corresponding convolution: (w,), (h, w) or (d,
 * \param dilate Dilation factor for each dimension of the input: (w,), (h, w) or (d, h,
 * \param pad The amount of implicit zero padding added during convolution for each
 *        dimension of the input: (w,), (h, w) or (d, h, w). ``(kernel-1)/2`` is usually
 *        a good choice. If `target_shape` is set, `pad` will be ignored and a padding
 * \param adj Adjustment for output shape: (w,), (h, w) or (d, h, w). If `target_shape`
 * \param target_shape Shape of the output tensor: (w,), (h, w) or (d, h, w).
 * \param num_group Number of groups partition.
 * \param workspace Maximum temporary workspace allowed (MB) in deconvolution.This
 *        parameter has two usages. When CUDNN is not used, it determines the effective
 *        batch size of the deconvolution kernel. When CUDNN is used, it controls the
 *        maximum temporary storage used for tuning the best CUDNN kernel when
 * \param no_bias Whether to disable bias parameter.
 * \param cudnn_tune Whether to pick convolution algorithm by running performance test.
 * \param cudnn_off Turn off cudnn for this layer.
 * \param layout Set layout for input, output and weight. Empty for default layout, NCW
 * \return new symbol
 */
inline Symbol Deconvolution(const std::string& symbol_name,
                            Symbol data,
                            Symbol weight,
                            Symbol bias,
                            Shape kernel,
                            uint32_t num_filter,
                            Shape stride = {},
                            Shape dilate = {},
                            Shape pad = {},
                            Shape adj = {},
                            Shape target_shape = {},
                            uint32_t num_group = 1,
                            uint64_t workspace = 512,
                            bool no_bias = true,
                            DeconvolutionCudnnTune cudnn_tune = DeconvolutionCudnnTune::kNone,
                            bool cudnn_off = false,
                            DeconvolutionLayout layout = DeconvolutionLayout::kNone) {
  static const char *DeconvolutionCudnnTuneValues[] = {
    "None",
    "fastest",
    "limited_workspace",
    "off"
  };
  static const char *DeconvolutionLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC"
  };
  return Operator("Deconvolution")
           .SetParam("kernel", kernel)
           .SetParam("num_filter", num_filter)
           .SetParam("stride", stride)
           .SetParam("dilate", dilate)
           .SetParam("pad", pad)
           .SetParam("adj", adj)
           .SetParam("target_shape", target_shape)
           .SetParam("num_group", num_group)
           .SetParam("workspace", workspace)
           .SetParam("no_bias", no_bias)
           .SetParam("cudnn_tune", DeconvolutionCudnnTuneValues[int(cudnn_tune)])
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("layout", DeconvolutionLayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .CreateSymbol(symbol_name);
}

/*! \brief upsampling method
 */
enum class UpSamplingSampleType {
  kBilinear = 0,
  kNearest = 1
};

/*! \brief How to handle multiple input. concat means concatenate upsampled images along
 *        the channel dimension. sum means add all images together, only available for
 */
enum class UpSamplingMultiInputMode {
  kConcat = 0,
  kSum = 1
};

/*!
 * \brief Upsamples the given input data.
 *
 *        Two algorithms (``sample_type``) are available for upsampling:
 *
 *        - Nearest Neighbor
 *        - Bilinear
 *
 *        **Nearest Neighbor Upsampling**
 *
 *        Input data is expected to be NCHW.
 *
 *        Example::
 *
 *        x = [[[[1. 1. 1.]
 *        [1. 1. 1.]
 *        [1. 1. 1.]]]]
 *
 *        UpSampling(x, scale=2, sample_type='nearest') = [[[[1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]]]]
 *
 *        **Bilinear Upsampling**
 *
 *        Uses `deconvolution` algorithm under the hood. You need provide both input data
 *
 *        Input data is expected to be NCHW.
 *
 *        `num_filter` is expected to be same as the number of channels.
 *
 *        Example::
 *
 *        x = [[[[1. 1. 1.]
 *        [1. 1. 1.]
 *        [1. 1. 1.]]]]
 *
 *        w = [[[[1. 1. 1. 1.]
 *        [1. 1. 1. 1.]
 *        [1. 1. 1. 1.]
 *        [1. 1. 1. 1.]]]]
 *
 *        UpSampling(x, w, scale=2, sample_type='bilinear', num_filter=1) = [[[[1. 2. 2.
 *        [2. 4. 4. 4. 4. 2.]
 *        [2. 4. 4. 4. 4. 2.]
 *        [2. 4. 4. 4. 4. 2.]
 *        [2. 4. 4. 4. 4. 2.]
 *        [1. 2. 2. 2. 2. 1.]]]]
 *
 *
 *        Defined in src/operator/nn/upsampling.cc:L173
 * \param symbol_name name of the resulting symbol
 * \param data Array of tensors to upsample. For bilinear upsampling, there should be 2
 * \param scale Up sampling scale
 * \param sample_type upsampling method
 * \param num_args Number of inputs to be upsampled. For nearest neighbor upsampling,
 *        this can be 1-N; the size of output will be(scale*h_0,scale*w_0) and all other
 *        inputs will be upsampled to thesame size. For bilinear upsampling this must be
 * \param num_filter Input filter. Only used by bilinear sample_type.Since bilinear
 * \param multi_input_mode How to handle multiple input. concat means concatenate
 *        upsampled images along the channel dimension. sum means add all images
 * \param workspace Tmp workspace for deconvolution (MB)
 * \return new symbol
 */
inline Symbol UpSampling(const std::string& symbol_name,
                         const std::vector<Symbol>& data,
                         int scale,
                         UpSamplingSampleType sample_type,
                         int num_args,
                         int num_filter = 0,
                         UpSamplingMultiInputMode multi_input_mode = UpSamplingMultiInputMode::kConcat,
                         uint64_t workspace = 512) {
  static const char *UpSamplingSampleTypeValues[] = {
    "bilinear",
    "nearest"
  };
  static const char *UpSamplingMultiInputModeValues[] = {
    "concat",
    "sum"
  };
  return Operator("UpSampling")
           .SetParam("scale", scale)
           .SetParam("sample_type", UpSamplingSampleTypeValues[int(sample_type)])
           .SetParam("num_args", num_args)
           .SetParam("num_filter", num_filter)
           .SetParam("multi_input_mode", UpSamplingMultiInputModeValues[int(multi_input_mode)])
           .SetParam("workspace", workspace)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Batch normalization.
 *
 *        Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
 *        well as offset ``beta``.
 *
 *        Assume the input has more than one dimension and we normalize along axis 1.
 *        We first compute the mean and variance along this axis:
 *
 *        .. math::
 *
 *        data\_mean[i] = mean(data[:,i,:,...]) \\
 *        data\_var[i] = var(data[:,i,:,...])
 *
 *        Then compute the normalized output, which has the same shape as input, as
 *
 *        .. math::
 *
 *        out[:,i,:,...] = \frac{data[:,i,:,...] -
 *
 *        Both *mean* and *var* returns a scalar by treating the input as a vector.
 *
 *        Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 *        have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
 *        the inverse of ``data_var``, which are needed for the backward pass. Note that
 *        two outputs are blocked.
 *
 *        Besides the inputs and the outputs, this operator accepts two auxiliary
 *        states, ``moving_mean`` and ``moving_var``, which are *k*-length
 *        vectors. They are global statistics for the whole dataset, which are updated
 *        by::
 *
 *        moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
 *        moving_var = moving_var * momentum + data_var * (1 - momentum)
 *
 *        If ``use_global_stats`` is set to be true, then ``moving_mean`` and
 *        ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
 *        the output. It is often used during inference.
 *
 *        The parameter ``axis`` specifies which axis of the input shape denotes
 *        the 'channel' (separately normalized groups).  The default is 1.  Specifying -1
 *        axis to be the last item in the input shape.
 *
 *        Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
 *        then set ``gamma`` to 1 and its gradient to 0.
 *
 *        .. Note::
 *        When ``fix_gamma`` is set to True, no sparse support is provided. If
 *        the sparse tensors will fallback.
 *
 *
 *
 *        Defined in src/operator/nn/batch_norm.cc:L572
 * \param symbol_name name of the resulting symbol
 * \param data Input data to batch normalization
 * \param gamma gamma array
 * \param beta beta array
 * \param moving_mean running mean of input
 * \param moving_var running variance of input
 * \param eps Epsilon to prevent div 0. Must be no less than CUDNN_BN_MIN_EPSILON defined
 * \param momentum Momentum for moving average
 * \param fix_gamma Fix gamma while training
 * \param use_global_stats Whether use global moving statistics instead of local
 * \param output_mean_var Output the mean and inverse std
 * \param axis Specify which shape axis the channel is specified
 * \param cudnn_off Do not select CUDNN operator, if available
 * \return new symbol
 */
inline Symbol BatchNorm(const std::string& symbol_name,
                        Symbol data,
                        Symbol gamma,
                        Symbol beta,
                        Symbol moving_mean,
                        Symbol moving_var,
                        double eps = 0.0010000000474974513,
                        mx_float momentum = 0.899999976,
                        bool fix_gamma = true,
                        bool use_global_stats = false,
                        bool output_mean_var = false,
                        int axis = 1,
                        bool cudnn_off = false) {
  return Operator("BatchNorm")
           .SetParam("eps", eps)
           .SetParam("momentum", momentum)
           .SetParam("fix_gamma", fix_gamma)
           .SetParam("use_global_stats", use_global_stats)
           .SetParam("output_mean_var", output_mean_var)
           .SetParam("axis", axis)
           .SetParam("cudnn_off", cudnn_off)
           .SetInput("data", data)
           .SetInput("gamma", gamma)
           .SetInput("beta", beta)
           .SetInput("moving_mean", moving_mean)
           .SetInput("moving_var", moving_var)
           .CreateSymbol(symbol_name);
}

/*! \brief Set the label that is reserved for blank label.If "first", 0-th label is
 *        reserved, and label values for tokens in the vocabulary are between ``1`` and
 *        ``alphabet_size-1``, and the padding mask is ``-1``. If "last", last label
 *        value ``alphabet_size-1`` is reserved for blank label instead, and label values
 *        for tokens in the vocabulary are between ``0`` and ``alphabet_size-2``, and the
 */
enum class CTCLossBlankLabel {
  kFirst = 0,
  kLast = 1
};

/*!
 * \brief Connectionist Temporal Classification Loss.
 *
 *        .. note:: The existing alias ``contrib_CTCLoss`` is deprecated.
 *
 *        The shapes of the inputs and outputs:
 *
 *        - **data**: `(sequence_length, batch_size, alphabet_size)`
 *        - **label**: `(batch_size, label_sequence_length)`
 *        - **out**: `(batch_size)`
 *
 *        The `data` tensor consists of sequences of activation vectors (without applying
 *        with i-th channel in the last dimension corresponding to i-th label
 *        for i between 0 and alphabet_size-1 (i.e always 0-indexed).
 *        Alphabet size should include one additional value reserved for blank label.
 *        When `blank_label` is ``"first"``, the ``0``-th channel is be reserved for
 *        activation of blank label, or otherwise if it is "last",
 *        reserved for blank label.
 *
 *        ``label`` is an index matrix of integers. When `blank_label` is ``"first"``,
 *        the value 0 is then reserved for blank label, and should not be passed in this
 *        when `blank_label` is ``"last"``, the value `(alphabet_size-1)` is reserved for
 *
 *        If a sequence of labels is shorter than *label_sequence_length*, use the special
 *        padding value at the end of the sequence to conform it to the correct
 *        length. The padding value is `0` when `blank_label` is ``"first"``, and `-1`
 *
 *        For example, suppose the vocabulary is `[a, b, c]`, and in one batch we have
 *        'ba', 'cbb', and 'abac'. When `blank_label` is ``"first"``, we can index the
 *        `{'a': 1, 'b': 2, 'c': 3}`, and we reserve the 0-th channel for blank label in
 *        The resulting `label` tensor should be padded to be::
 *
 *        [[2, 1, 0, 0], [3, 2, 2, 0], [1, 2, 1, 3]]
 *
 *        When `blank_label` is ``"last"``, we can index the labels as
 *        `{'a': 0, 'b': 1, 'c': 2}`, and we reserve the channel index 3 for blank label
 *        The resulting `label` tensor should be padded to be::
 *
 *        [[1, 0, -1, -1], [2, 1, 1, -1], [0, 1, 0, 2]]
 *
 *        ``out`` is a list of CTC loss values, one per example in the batch.
 *
 *        See *Connectionist Temporal Classification: Labelling Unsegmented
 *        Sequence Data with Recurrent Neural Networks*, A. Graves *et al*. for more
 *        information on the definition and the algorithm.
 *
 *
 *
 *        Defined in src/operator/nn/ctc_loss.cc:L100
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \param label Ground-truth labels for the loss.
 * \param data_lengths Lengths of data for each of the samples. Only required when
 * \param label_lengths Lengths of labels for each of the samples. Only required when
 * \param use_data_lengths Whether the data lenghts are decided by `data_lengths`. If
 * \param use_label_lengths Whether the label lenghts are decided by `label_lengths`, or
 *        derived from `padding_mask`. If false, the lengths are derived from the first
 *        occurrence of the value of `padding_mask`. The value of `padding_mask` is ``0``
 *        when first CTC label is reserved for blank, and ``-1`` when last label is
 * \param blank_label Set the label that is reserved for blank label.If "first", 0-th
 *        label is reserved, and label values for tokens in the vocabulary are between
 *        ``1`` and ``alphabet_size-1``, and the padding mask is ``-1``. If "last", last
 *        label value ``alphabet_size-1`` is reserved for blank label instead, and label
 *        values for tokens in the vocabulary are between ``0`` and ``alphabet_size-2``,
 * \return new symbol
 */
inline Symbol CTCLoss(const std::string& symbol_name,
                      Symbol data,
                      Symbol label,
                      Symbol data_lengths,
                      Symbol label_lengths,
                      bool use_data_lengths = false,
                      bool use_label_lengths = false,
                      CTCLossBlankLabel blank_label = CTCLossBlankLabel::kFirst) {
  static const char *CTCLossBlankLabelValues[] = {
    "first",
    "last"
  };
  return Operator("CTCLoss")
           .SetParam("use_data_lengths", use_data_lengths)
           .SetParam("use_label_lengths", use_label_lengths)
           .SetParam("blank_label", CTCLossBlankLabelValues[int(blank_label)])
           .SetInput("data", data)
           .SetInput("label", label)
           .SetInput("data_lengths", data_lengths)
           .SetInput("label_lengths", label_lengths)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Applies local response normalization to the input.
 *
 *        The local response normalization layer performs "lateral inhibition" by
 *        over local input regions.
 *
 *        If :math:`a_{x,y}^{i}` is the activity of a neuron computed by applying kernel
 *        :math:`(x, y)` and then applying the ReLU nonlinearity, the response-normalized
 *        activity :math:`b_{x,y}^{i}` is given by the expression:
 *
 *        .. math::
 *        b_{x,y}^{i} = \frac{a_{x,y}^{i}}{\Bigg({k + \frac{\alpha}{n} \sum_{j=max(0,
 *
 *        where the sum runs over :math:`n` "adjacent" kernel maps at the same spatial
 *        number of kernels in the layer.
 *
 *
 *
 *        Defined in src/operator/nn/lrn.cc:L164
 * \param symbol_name name of the resulting symbol
 * \param data Input data to LRN
 * \param nsize normalization window width in elements.
 * \param alpha The variance scaling parameter :math:`lpha` in the LRN expression.
 * \param beta The power parameter :math:`eta` in the LRN expression.
 * \param knorm The parameter :math:`k` in the LRN expression.
 * \return new symbol
 */
inline Symbol LRN(const std::string& symbol_name,
                  Symbol data,
                  uint32_t nsize,
                  mx_float alpha = 9.99999975e-05,
                  mx_float beta = 0.75,
                  mx_float knorm = 2) {
  return Operator("LRN")
           .SetParam("nsize", nsize)
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetParam("knorm", knorm)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Layer normalization.
 *
 *        Normalizes the channels of the input tensor by mean and variance, and applies a
 *        well as offset ``beta``.
 *
 *        Assume the input has more than one dimension and we normalize along axis 1.
 *        We first compute the mean and variance along this axis and then
 *        compute the normalized output, which has the same shape as input, as following:
 *
 *        .. math::
 *
 *        out = \frac{data - mean(data, axis)}{\sqrt{var(data, axis) + \epsilon}} * gamma
 *
 *        Both ``gamma`` and ``beta`` are learnable parameters.
 *
 *        Unlike BatchNorm and InstanceNorm,  the *mean* and *var* are computed along the
 *
 *        Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 *        have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
 *        ``data_std``. Note that no gradient will be passed through these two outputs.
 *
 *        The parameter ``axis`` specifies which axis of the input shape denotes
 *        the 'channel' (separately normalized groups).  The default is -1, which sets
 *        axis to be the last item in the input shape.
 *
 *
 *
 *        Defined in src/operator/nn/layer_norm.cc:L155
 * \param symbol_name name of the resulting symbol
 * \param data Input data to layer normalization
 * \param gamma gamma array
 * \param beta beta array
 * \param axis The axis to perform layer normalization. Usually, this should be be axis
 * \param eps An `epsilon` parameter to prevent division by 0.
 * \param output_mean_var Output the mean and std calculated along the given axis.
 * \return new symbol
 */
inline Symbol LayerNorm(const std::string& symbol_name,
                        Symbol data,
                        Symbol gamma,
                        Symbol beta,
                        int axis = -1,
                        mx_float eps = 9.99999975e-06,
                        bool output_mean_var = false) {
  return Operator("LayerNorm")
           .SetParam("axis", axis)
           .SetParam("eps", eps)
           .SetParam("output_mean_var", output_mean_var)
           .SetInput("data", data)
           .SetInput("gamma", gamma)
           .SetInput("beta", beta)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data List of arrays to concatenate
 * \param num_args Number of inputs to be concated.
 * \param dim the dimension to be concated.
 * \return new symbol
 */
inline Symbol _rnn_param_concat(const std::string& symbol_name,
                                const std::vector<Symbol>& data,
                                int num_args,
                                int dim = 1) {
  return Operator("_rnn_param_concat")
           .SetParam("num_args", num_args)
           .SetParam("dim", dim)
(data)
           .CreateSymbol(symbol_name);
}

/*! \brief Whether to only turn on dropout during training or to also turn on for
 */
enum class DropoutMode {
  kAlways = 0,
  kTraining = 1
};

/*!
 * \brief Applies dropout operation to input array.
 *
 *        - During training, each element of the input is set to zero with probability p.
 *        The whole array is rescaled by :math:`1/(1-p)` to keep the expected
 *        sum of the input unchanged.
 *
 *        - During testing, this operator does not change the input if mode is 'training'.
 *        If mode is 'always', the same computaion as during training will be applied.
 *
 *        Example::
 *
 *        random.seed(998)
 *        input_array = array([[3., 0.5,  -0.5,  2., 7.],
 *        [2., -0.4,   7.,  3., 0.2]])
 *        a = symbol.Variable('a')
 *        dropout = symbol.Dropout(a, p = 0.2)
 *        executor = dropout.simple_bind(a = input_array.shape)
 *
 *        ## If training
 *        executor.forward(is_train = True, a = input_array)
 *        executor.outputs
 *        [[ 3.75   0.625 -0.     2.5    8.75 ]
 *        [ 2.5   -0.5    8.75   3.75   0.   ]]
 *
 *        ## If testing
 *        executor.forward(is_train = False, a = input_array)
 *        executor.outputs
 *        [[ 3.     0.5   -0.5    2.     7.   ]
 *        [ 2.    -0.4    7.     3.     0.2  ]]
 *
 *
 *        Defined in src/operator/nn/dropout.cc:L97
 * \param symbol_name name of the resulting symbol
 * \param data Input array to which dropout will be applied.
 * \param p Fraction of the input that gets dropped out during training time.
 * \param mode Whether to only turn on dropout during training or to also turn on for
 * \param axes Axes for variational dropout kernel.
 * \param cudnn_off Whether to turn off cudnn in dropout operator. This option is ignored
 * \return new symbol
 */
inline Symbol Dropout(const std::string& symbol_name,
                      Symbol data,
                      mx_float p = 0.5,
                      DropoutMode mode = DropoutMode::kTraining,
                      Shape axes = {},
                      dmlc::optional<bool> cudnn_off = dmlc::optional<bool>(0)) {
  static const char *DropoutModeValues[] = {
    "always",
    "training"
  };
  return Operator("Dropout")
           .SetParam("p", p)
           .SetParam("mode", DropoutModeValues[int(mode)])
           .SetParam("axes", axes)
           .SetParam("cudnn_off", cudnn_off)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief Specifies how to compute the softmax. If set to ``instance``, it computes
 *        softmax for each instance. If set to ``channel``, It computes cross channel
 */
enum class SoftmaxActivationMode {
  kChannel = 0,
  kInstance = 1
};

/*!
 * \brief Applies softmax activation to input. This is intended for internal layers.
 *
 *        .. note::
 *
 *        This operator has been deprecated, please use `softmax`.
 *
 *        If `mode` = ``instance``, this operator will compute a softmax for each
 *        This is the default mode.
 *
 *        If `mode` = ``channel``, this operator will compute a k-class softmax at each
 *        of each instance, where `k` = ``num_channel``. This mode can only be used when
 *        has at least 3 dimensions.
 *        This can be used for `fully convolutional network`, `image segmentation`, etc.
 *
 *        Example::
 *
 *        >>> input_array = mx.nd.array([[3., 0.5, -0.5, 2., 7.],
 *        >>>                            [2., -.4, 7.,   3., 0.2]])
 *        >>> softmax_act = mx.nd.SoftmaxActivation(input_array)
 *        >>> print softmax_act.asnumpy()
 *        [[  1.78322066e-02   1.46375655e-03   5.38485940e-04   6.56010211e-03
 *        [  6.56221947e-03   5.95310994e-04   9.73919690e-01   1.78379621e-02
 *
 *
 *
 *        Defined in src/operator/nn/softmax_activation.cc:L59
 * \param symbol_name name of the resulting symbol
 * \param data The input array.
 * \param mode Specifies how to compute the softmax. If set to ``instance``, it computes
 *        softmax for each instance. If set to ``channel``, It computes cross channel
 * \return new symbol
 */
inline Symbol SoftmaxActivation(const std::string& symbol_name,
                                Symbol data,
                                SoftmaxActivationMode mode = SoftmaxActivationMode::kInstance) {
  static const char *SoftmaxActivationModeValues[] = {
    "channel",
    "instance"
  };
  return Operator("SoftmaxActivation")
           .SetParam("mode", SoftmaxActivationModeValues[int(mode)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Crop an image NDArray of shape (H x W x C) or (N x H x W x C)
 *        to the given size.
 *        Example:
 *        .. code-block:: python
 *        image = mx.nd.random.uniform(0, 255, (4, 2, 3)).astype(dtype=np.uint8)
 *        mx.nd.image.crop(image, 1, 1, 2, 2)
 *        [[[144  34   4]
 *        [ 82 157  38]]
 *
 *        [[156 111 230]
 *        [177  25  15]]]
 *        <NDArray 2x2x3 @cpu(0)>
 *        image = mx.nd.random.uniform(0, 255, (2, 4, 2, 3)).astype(dtype=np.uint8)
 *        mx.nd.image.crop(image, 1, 1, 2, 2)
 *        [[[[ 35 198  50]
 *        [242  94 168]]
 *
 *        [[223 119 129]
 *        [249  14 154]]]
 *
 *
 *        [[[137 215 106]
 *        [ 79 174 133]]
 *
 *        [[116 142 109]
 *        [ 35 239  50]]]]
 *        <NDArray 2x2x2x3 @cpu(0)>
 *
 *
 *        Defined in src/operator/image/crop.cc:L65
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param x Left boundary of the cropping area.
 * \param y Top boundary of the cropping area.
 * \param width Width of the cropping area.
 * \param height Height of the cropping area.
 * \return new symbol
 */
inline Symbol _image_crop(const std::string& symbol_name,
                          Symbol data,
                          int x,
                          int y,
                          int width,
                          int height) {
  return Operator("_image_crop")
           .SetParam("x", x)
           .SetParam("y", y)
           .SetParam("width", width)
           .SetParam("height", height)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Converts an image NDArray of shape (H x W x C) or (N x H x W x C)
 *        with values in the range [0, 255] to a tensor NDArray of shape (C x H x W) or
 *        with values in the range [0, 1]
 *
 *        Example:
 *        .. code-block:: python
 *        image = mx.nd.random.uniform(0, 255, (4, 2, 3)).astype(dtype=np.uint8)
 *        to_tensor(image)
 *        [[[ 0.85490197  0.72156864]
 *        [ 0.09019608  0.74117649]
 *        [ 0.61960787  0.92941177]
 *        [ 0.96470588  0.1882353 ]]
 *        [[ 0.6156863   0.73725492]
 *        [ 0.46666667  0.98039216]
 *        [ 0.44705883  0.45490196]
 *        [ 0.01960784  0.8509804 ]]
 *        [[ 0.39607844  0.03137255]
 *        [ 0.72156864  0.52941179]
 *        [ 0.16470589  0.7647059 ]
 *        [ 0.05490196  0.70588237]]]
 *        <NDArray 3x4x2 @cpu(0)>
 *
 *        image = mx.nd.random.uniform(0, 255, (2, 4, 2, 3)).astype(dtype=np.uint8)
 *        to_tensor(image)
 *        [[[[0.11764706 0.5803922 ]
 *        [0.9411765  0.10588235]
 *        [0.2627451  0.73333335]
 *        [0.5647059  0.32156864]]
 *        [[0.7176471  0.14117648]
 *        [0.75686276 0.4117647 ]
 *        [0.18431373 0.45490196]
 *        [0.13333334 0.6156863 ]]
 *        [[0.6392157  0.5372549 ]
 *        [0.52156866 0.47058824]
 *        [0.77254903 0.21568628]
 *        [0.01568628 0.14901961]]]
 *        [[[0.6117647  0.38431373]
 *        [0.6784314  0.6117647 ]
 *        [0.69411767 0.96862745]
 *        [0.67058825 0.35686275]]
 *        [[0.21960784 0.9411765 ]
 *        [0.44705883 0.43529412]
 *        [0.09803922 0.6666667 ]
 *        [0.16862746 0.1254902 ]]
 *        [[0.6156863  0.9019608 ]
 *        [0.35686275 0.9019608 ]
 *        [0.05882353 0.6509804 ]
 *        [0.20784314 0.7490196 ]]]]
 *        <NDArray 2x3x4x2 @cpu(0)>
 *
 *
 *        Defined in src/operator/image/image_random.cc:L91
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \return new symbol
 */
inline Symbol _image_to_tensor(const std::string& symbol_name,
                               Symbol data) {
  return Operator("_image_to_tensor")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Normalize an tensor of shape (C x H x W) or (N x C x H x W) with mean and
 *        standard deviation.
 *
 *        Given mean `(m1, ..., mn)` and std `(s\ :sub:`1`\ , ..., s\ :sub:`n`)` for `n`
 *        this transform normalizes each channel of the input tensor with:
 *
 *        .. math::
 *
 *        output[i] = (input[i] - m\ :sub:`i`\ ) / s\ :sub:`i`
 *
 *        If mean or std is scalar, the same value will be applied to all channels.
 *
 *        Default value for mean is 0.0 and stand deviation is 1.0.
 *
 *        Example:
 *
 *        .. code-block:: python
 *        image = mx.nd.random.uniform(0, 1, (3, 4, 2))
 *        normalize(image, mean=(0, 1, 2), std=(3, 2, 1))
 *        [[[ 0.18293785  0.19761486]
 *        [ 0.23839645  0.28142193]
 *        [ 0.20092112  0.28598186]
 *        [ 0.18162774  0.28241724]]
 *        [[-0.2881726  -0.18821815]
 *        [-0.17705294 -0.30780914]
 *        [-0.2812064  -0.3512327 ]
 *        [-0.05411351 -0.4716435 ]]
 *        [[-1.0363373  -1.7273437 ]
 *        [-1.6165586  -1.5223348 ]
 *        [-1.208275   -1.1878313 ]
 *        [-1.4711051  -1.5200229 ]]]
 *        <NDArray 3x4x2 @cpu(0)>
 *
 *        image = mx.nd.random.uniform(0, 1, (2, 3, 4, 2))
 *        normalize(image, mean=(0, 1, 2), std=(3, 2, 1))
 *        [[[[ 0.18934818  0.13092826]
 *        [ 0.3085322   0.27869293]
 *        [ 0.02367868  0.11246539]
 *        [ 0.0290431   0.2160573 ]]
 *        [[-0.4898908  -0.31587923]
 *        [-0.08369008 -0.02142242]
 *        [-0.11092162 -0.42982462]
 *        [-0.06499392 -0.06495637]]
 *        [[-1.0213816  -1.526392  ]
 *        [-1.2008414  -1.1990893 ]
 *        [-1.5385206  -1.4795225 ]
 *        [-1.2194707  -1.3211205 ]]]
 *        [[[ 0.03942481  0.24021089]
 *        [ 0.21330701  0.1940066 ]
 *        [ 0.04778443  0.17912441]
 *        [ 0.31488964  0.25287187]]
 *        [[-0.23907584 -0.4470462 ]
 *        [-0.29266903 -0.2631998 ]
 *        [-0.3677222  -0.40683383]
 *        [-0.11288315 -0.13154092]]
 *        [[-1.5438497  -1.7834496 ]
 *        [-1.431566   -1.8647819 ]
 *        [-1.9812102  -1.675859  ]
 *        [-1.3823645  -1.8503251 ]]]]
 *        <NDArray 2x3x4x2 @cpu(0)>
 *
 *
 *        Defined in src/operator/image/image_random.cc:L165
 * \param symbol_name name of the resulting symbol
 * \param data Input ndarray
 * \param mean Sequence of means for each channel. Default value is 0.
 * \param std Sequence of standard deviations for each channel. Default value is 1.
 * \return new symbol
 */
inline Symbol _image_normalize(const std::string& symbol_name,
                               Symbol data,
                               nnvm::Tuple<mx_float> mean = {0,0,0,0},
                               nnvm::Tuple<mx_float> std = {1,1,1,1}) {
  return Operator("_image_normalize")
           .SetParam("mean", mean)
           .SetParam("std", std)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        Defined in src/operator/image/image_random.cc:L192
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \return new symbol
 */
inline Symbol _image_flip_left_right(const std::string& symbol_name,
                                     Symbol data) {
  return Operator("_image_flip_left_right")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        Defined in src/operator/image/image_random.cc:L196
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \return new symbol
 */
inline Symbol _image_random_flip_left_right(const std::string& symbol_name,
                                            Symbol data) {
  return Operator("_image_random_flip_left_right")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        Defined in src/operator/image/image_random.cc:L200
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \return new symbol
 */
inline Symbol _image_flip_top_bottom(const std::string& symbol_name,
                                     Symbol data) {
  return Operator("_image_flip_top_bottom")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        Defined in src/operator/image/image_random.cc:L204
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \return new symbol
 */
inline Symbol _image_random_flip_top_bottom(const std::string& symbol_name,
                                            Symbol data) {
  return Operator("_image_random_flip_top_bottom")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        Defined in src/operator/image/image_random.cc:L208
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param min_factor Minimum factor.
 * \param max_factor Maximum factor.
 * \return new symbol
 */
inline Symbol _image_random_brightness(const std::string& symbol_name,
                                       Symbol data,
                                       mx_float min_factor,
                                       mx_float max_factor) {
  return Operator("_image_random_brightness")
           .SetParam("min_factor", min_factor)
           .SetParam("max_factor", max_factor)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        Defined in src/operator/image/image_random.cc:L214
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param min_factor Minimum factor.
 * \param max_factor Maximum factor.
 * \return new symbol
 */
inline Symbol _image_random_contrast(const std::string& symbol_name,
                                     Symbol data,
                                     mx_float min_factor,
                                     mx_float max_factor) {
  return Operator("_image_random_contrast")
           .SetParam("min_factor", min_factor)
           .SetParam("max_factor", max_factor)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        Defined in src/operator/image/image_random.cc:L221
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param min_factor Minimum factor.
 * \param max_factor Maximum factor.
 * \return new symbol
 */
inline Symbol _image_random_saturation(const std::string& symbol_name,
                                       Symbol data,
                                       mx_float min_factor,
                                       mx_float max_factor) {
  return Operator("_image_random_saturation")
           .SetParam("min_factor", min_factor)
           .SetParam("max_factor", max_factor)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        Defined in src/operator/image/image_random.cc:L228
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param min_factor Minimum factor.
 * \param max_factor Maximum factor.
 * \return new symbol
 */
inline Symbol _image_random_hue(const std::string& symbol_name,
                                Symbol data,
                                mx_float min_factor,
                                mx_float max_factor) {
  return Operator("_image_random_hue")
           .SetParam("min_factor", min_factor)
           .SetParam("max_factor", max_factor)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        Defined in src/operator/image/image_random.cc:L235
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param brightness How much to jitter brightness.
 * \param contrast How much to jitter contrast.
 * \param saturation How much to jitter saturation.
 * \param hue How much to jitter hue.
 * \return new symbol
 */
inline Symbol _image_random_color_jitter(const std::string& symbol_name,
                                         Symbol data,
                                         mx_float brightness,
                                         mx_float contrast,
                                         mx_float saturation,
                                         mx_float hue) {
  return Operator("_image_random_color_jitter")
           .SetParam("brightness", brightness)
           .SetParam("contrast", contrast)
           .SetParam("saturation", saturation)
           .SetParam("hue", hue)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Adjust the lighting level of the input. Follow the AlexNet style.
 *
 *        Defined in src/operator/image/image_random.cc:L242
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param alpha The lighting alphas for the R, G, B channels.
 * \return new symbol
 */
inline Symbol _image_adjust_lighting(const std::string& symbol_name,
                                     Symbol data,
                                     nnvm::Tuple<mx_float> alpha) {
  return Operator("_image_adjust_lighting")
           .SetParam("alpha", alpha)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Randomly add PCA noise. Follow the AlexNet style.
 *
 *        Defined in src/operator/image/image_random.cc:L249
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param alpha_std Level of the lighting noise.
 * \return new symbol
 */
inline Symbol _image_random_lighting(const std::string& symbol_name,
                                     Symbol data,
                                     mx_float alpha_std = 0.0500000007) {
  return Operator("_image_random_lighting")
           .SetParam("alpha_std", alpha_std)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Resize an image NDArray of shape (H x W x C) or (N x H x W x C)
 *        to the given size
 *        Example:
 *        .. code-block:: python
 *        image = mx.nd.random.uniform(0, 255, (4, 2, 3)).astype(dtype=np.uint8)
 *        mx.nd.image.resize(image, (3, 3))
 *        [[[124 111 197]
 *        [158  80 155]
 *        [193  50 112]]
 *
 *        [[110 100 113]
 *        [134 165 148]
 *        [157 231 182]]
 *
 *        [[202 176 134]
 *        [174 191 149]
 *        [147 207 164]]]
 *        <NDArray 3x3x3 @cpu(0)>
 *        image = mx.nd.random.uniform(0, 255, (2, 4, 2, 3)).astype(dtype=np.uint8)
 *        mx.nd.image.resize(image, (2, 2))
 *        [[[[ 59 133  80]
 *        [187 114 153]]
 *
 *        [[ 38 142  39]
 *        [207 131 124]]]
 *
 *
 *        [[[117 125 136]
 *        [191 166 150]]
 *
 *        [[129  63 113]
 *        [182 109  48]]]]
 *        <NDArray 2x2x2x3 @cpu(0)>
 *
 *
 *        Defined in src/operator/image/resize.cc:L70
 * \param symbol_name name of the resulting symbol
 * \param data The input.
 * \param size Size of new image. Could be (width, height) or (size)
 * \param keep_ratio Whether to resize the short edge or both edges to `size`, if size is
 * \param interp Interpolation method for resizing. By default uses bilinear
 *        interpolationOptions are INTER_NEAREST - a nearest-neighbor
 *        interpolationINTER_LINEAR - a bilinear interpolationINTER_AREA - resampling
 *        using pixel area relationINTER_CUBIC - a bicubic interpolation over 4x4 pixel
 *        neighborhoodINTER_LANCZOS4 - a Lanczos interpolation over 8x8 pixel
 *        neighborhoodNote that the GPU version only support bilinear interpolation(1)
 *        and the result on cpu would be slightly different from gpu.It uses opencv
 *        resize function which tend to align center on cpuwhile using
 * \return new symbol
 */
inline Symbol _image_resize(const std::string& symbol_name,
                            Symbol data,
                            Shape size = {},
                            bool keep_ratio = false,
                            int interp = 1) {
  return Operator("_image_resize")
           .SetParam("size", size)
           .SetParam("keep_ratio", keep_ratio)
           .SetParam("interp", interp)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Decode image with OpenCV.
 *        Note: return image in RGB by default, instead of OpenCV's default BGR.
 * \param symbol_name name of the resulting symbol
 * \param buf Buffer containing binary encoded image
 * \param flag Convert decoded image to grayscale (0) or color (1).
 * \param to_rgb Whether to convert decoded image to mxnet's default RGB format (instead
 * \return new symbol
 */
inline Symbol _cvimdecode(const std::string& symbol_name,
                          Symbol buf,
                          int flag = 1,
                          bool to_rgb = true) {
  return Operator("_cvimdecode")
           .SetParam("flag", flag)
           .SetParam("to_rgb", to_rgb)
           .SetInput("buf", buf)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Read and decode image with OpenCV.
 *        Note: return image in RGB by default, instead of OpenCV's default BGR.
 * \param symbol_name name of the resulting symbol
 * \param filename Name of the image file to be loaded.
 * \param flag Convert decoded image to grayscale (0) or color (1).
 * \param to_rgb Whether to convert decoded image to mxnet's default RGB format (instead
 * \return new symbol
 */
inline Symbol _cvimread(const std::string& symbol_name,
                        const std::string& filename,
                        int flag = 1,
                        bool to_rgb = true) {
  return Operator("_cvimread")
           .SetParam("flag", flag)
           .SetParam("to_rgb", to_rgb)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Resize image with OpenCV.
 *
 * \param symbol_name name of the resulting symbol
 * \param src source image
 * \param w Width of resized image.
 * \param h Height of resized image.
 * \param interp Interpolation method (default=cv2.INTER_LINEAR).
 * \return new symbol
 */
inline Symbol _cvimresize(const std::string& symbol_name,
                          Symbol src,
                          int w,
                          int h,
                          int interp = 1) {
  return Operator("_cvimresize")
           .SetParam("w", w)
           .SetParam("h", h)
           .SetParam("interp", interp)
           .SetInput("src", src)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Pad image border with OpenCV.
 *
 * \param symbol_name name of the resulting symbol
 * \param src source image
 * \param top Top margin.
 * \param bot Bottom margin.
 * \param left Left margin.
 * \param right Right margin.
 * \param type Filling type (default=cv2.BORDER_CONSTANT).
 * \param value (Deprecated! Use ``values`` instead.) Fill with single value.
 * \param values Fill with value(RGB[A] or gray), up to 4 channels.
 * \return new symbol
 */
inline Symbol _cvcopyMakeBorder(const std::string& symbol_name,
                                Symbol src,
                                int top,
                                int bot,
                                int left,
                                int right,
                                int type = 0,
                                double value = 0,
                                nnvm::Tuple<double> values = {}) {
  return Operator("_cvcopyMakeBorder")
           .SetParam("top", top)
           .SetParam("bot", bot)
           .SetParam("left", left)
           .SetParam("right", right)
           .SetParam("type", type)
           .SetParam("value", value)
           .SetParam("values", values)
           .SetInput("src", src)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Place holder for variable who cannot perform gradient
 * \param symbol_name name of the resulting symbol
 * \return new symbol
 */
inline Symbol _NoGradient(const std::string& symbol_name) {
  return Operator("_NoGradient")
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data input data list
 * \return new symbol
 */
inline Symbol _CachedOp(const std::string& symbol_name,
                        const std::vector<Symbol>& data) {
  return Operator("_CachedOp")
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param data input data
 * \return new symbol
 */
inline Symbol _copyto(const std::string& symbol_name,
                      Symbol data) {
  return Operator("_copyto")
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief the type of RNN to compute
 */
enum class RNNMode {
  kGru = 0,
  kLstm = 1,
  kRnn_relu = 2,
  kRnn_tanh = 3
};

/*!
 * \brief Applies recurrent layers to input data. Currently, vanilla RNN, LSTM and GRU are
 *        implemented, with both multi-layer and bidirectional support.
 *
 *        When the input data is of type float32 and the environment variables
 *        and MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION are set to 1, this operator will
 *        pseudo-float16 precision (float32 math with float16 I/O) precision in order to
 *        Tensor Cores on suitable NVIDIA GPUs. This can sometimes give significant
 *
 *        **Vanilla RNN**
 *
 *        Applies a single-gate recurrent layer to input X. Two kinds of activation
 *        ReLU and Tanh.
 *
 *        With ReLU activation function:
 *
 *        .. math::
 *        h_t = relu(W_{ih} * x_t + b_{ih}  +  W_{hh} * h_{(t-1)} + b_{hh})
 *
 *        With Tanh activtion function:
 *
 *        .. math::
 *        h_t = \tanh(W_{ih} * x_t + b_{ih}  +  W_{hh} * h_{(t-1)} + b_{hh})
 *
 *        Reference paper: Finding structure in time - Elman, 1988.
 *        https://crl.ucsd.edu/~elman/Papers/fsit.pdf
 *
 *        **LSTM**
 *
 *        Long Short-Term Memory - Hochreiter, 1997.
 *
 *        .. math::
 *        \begin{array}{ll}
 *        i_t = \mathrm{sigmoid}(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\
 *        f_t = \mathrm{sigmoid}(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\
 *        g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \\
 *        o_t = \mathrm{sigmoid}(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\
 *        c_t = f_t * c_{(t-1)} + i_t * g_t \\
 *        h_t = o_t * \tanh(c_t)
 *        \end{array}
 *
 *        **GRU**
 *
 *        Gated Recurrent Unit - Cho et al. 2014. http://arxiv.org/abs/1406.1078
 *
 *        The definition of GRU here is slightly different from paper but compatible with
 *
 *        .. math::
 *        \begin{array}{ll}
 *        r_t = \mathrm{sigmoid}(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\
 *        z_t = \mathrm{sigmoid}(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\
 *        n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\
 *        h_t = (1 - z_t) * n_t + z_t * h_{(t-1)} \\
 *        \end{array}
 *
 *
 *        Defined in src/operator/rnn.cc:L690
 * \param symbol_name name of the resulting symbol
 * \param data Input data to RNN
 * \param parameters Vector of all RNN trainable parameters concatenated
 * \param state initial hidden state of the RNN
 * \param state_cell initial cell state for LSTM networks (only for LSTM)
 * \param sequence_length Vector of valid sequence lengths for each element in batch.
 * \param state_size size of the state for each layer
 * \param num_layers number of stacked layers
 * \param mode the type of RNN to compute
 * \param bidirectional whether to use bidirectional recurrent layers
 * \param p drop rate of the dropout on the outputs of each RNN layer, except the last
 * \param state_outputs Whether to have the states as symbol outputs.
 * \param projection_size size of project size
 * \param lstm_state_clip_min Minimum clip value of LSTM states. This option must be used
 * \param lstm_state_clip_max Maximum clip value of LSTM states. This option must be used
 * \param lstm_state_clip_nan Whether to stop NaN from propagating in state by clipping
 * \param use_sequence_length If set to true, this layer takes in an extra input
 * \return new symbol
 */
inline Symbol RNN(const std::string& symbol_name,
                  Symbol data,
                  Symbol parameters,
                  Symbol state,
                  Symbol state_cell,
                  Symbol sequence_length,
                  uint32_t state_size,
                  uint32_t num_layers,
                  RNNMode mode,
                  bool bidirectional = false,
                  mx_float p = 0,
                  bool state_outputs = false,
                  dmlc::optional<int> projection_size = dmlc::optional<int>(),
                  dmlc::optional<double> lstm_state_clip_min = dmlc::optional<double>(),
                  dmlc::optional<double> lstm_state_clip_max = dmlc::optional<double>(),
                  bool lstm_state_clip_nan = false,
                  bool use_sequence_length = false) {
  static const char *RNNModeValues[] = {
    "gru",
    "lstm",
    "rnn_relu",
    "rnn_tanh"
  };
  return Operator("RNN")
           .SetParam("state_size", state_size)
           .SetParam("num_layers", num_layers)
           .SetParam("mode", RNNModeValues[int(mode)])
           .SetParam("bidirectional", bidirectional)
           .SetParam("p", p)
           .SetParam("state_outputs", state_outputs)
           .SetParam("projection_size", projection_size)
           .SetParam("lstm_state_clip_min", lstm_state_clip_min)
           .SetParam("lstm_state_clip_max", lstm_state_clip_max)
           .SetParam("lstm_state_clip_nan", lstm_state_clip_nan)
           .SetParam("use_sequence_length", use_sequence_length)
           .SetInput("data", data)
           .SetInput("parameters", parameters)
           .SetInput("state", state)
           .SetInput("state_cell", state_cell)
           .SetInput("sequence_length", sequence_length)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for SignSGD optimizer.
 *
 *        .. math::
 *
 *        g_t = \nabla J(W_{t-1})\\
 *        W_t = W_{t-1} - \eta_t \text{sign}(g_t)
 *
 *        It updates the weights using::
 *
 *        weight = weight - learning_rate * sign(gradient)
 *
 *        .. note::
 *        - sparse ndarray not supported for this optimizer yet.
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L61
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param lr Learning rate
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol signsgd_update(const std::string& symbol_name,
                             Symbol weight,
                             Symbol grad,
                             mx_float lr,
                             mx_float wd = 0,
                             mx_float rescale_grad = 1,
                             mx_float clip_gradient = -1) {
  return Operator("signsgd_update")
           .SetParam("lr", lr)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief SIGN momentUM (Signum) optimizer.
 *
 *        .. math::
 *
 *        g_t = \nabla J(W_{t-1})\\
 *        m_t = \beta m_{t-1} + (1 - \beta) g_t\\
 *        W_t = W_{t-1} - \eta_t \text{sign}(m_t)
 *
 *        It updates the weights using::
 *        state = momentum * state + (1-momentum) * gradient
 *        weight = weight - learning_rate * sign(state)
 *
 *        Where the parameter ``momentum`` is the decay rate of momentum estimates at
 *
 *        .. note::
 *        - sparse ndarray not supported for this optimizer yet.
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L90
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param mom Momentum
 * \param lr Learning rate
 * \param momentum The decay rate of momentum estimates at each epoch.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param wd_lh The amount of weight decay that does not go into gradient/momentum
 * \return new symbol
 */
inline Symbol signum_update(const std::string& symbol_name,
                            Symbol weight,
                            Symbol grad,
                            Symbol mom,
                            mx_float lr,
                            mx_float momentum = 0,
                            mx_float wd = 0,
                            mx_float rescale_grad = 1,
                            mx_float clip_gradient = -1,
                            mx_float wd_lh = 0) {
  return Operator("signum_update")
           .SetParam("lr", lr)
           .SetParam("momentum", momentum)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("wd_lh", wd_lh)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mom", mom)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for Stochastic Gradient Descent (SDG) optimizer.
 *
 *        It updates the weights using::
 *
 *        weight = weight - learning_rate * (gradient + wd * weight)
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L327
 * \param symbol_name name of the resulting symbol
 * \param data Weights
 * \param lrs Learning rates.
 * \param wds Weight decay augments the objective function with a regularization term
 *        that penalizes large weights. The penalty scales with the square of the
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param num_weights Number of updated weights.
 * \return new symbol
 */
inline Symbol multi_sgd_update(const std::string& symbol_name,
                               const std::vector<Symbol>& data,
                               nnvm::Tuple<mx_float> lrs,
                               nnvm::Tuple<mx_float> wds,
                               mx_float rescale_grad = 1,
                               mx_float clip_gradient = -1,
                               int num_weights = 1) {
  return Operator("multi_sgd_update")
           .SetParam("lrs", lrs)
           .SetParam("wds", wds)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("num_weights", num_weights)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Momentum update function for Stochastic Gradient Descent (SGD) optimizer.
 *
 *        Momentum update has better convergence rates on neural networks. Mathematically
 *        like below:
 *
 *        .. math::
 *
 *        v_1 = \alpha * \nabla J(W_0)\\
 *        v_t = \gamma v_{t-1} - \alpha * \nabla J(W_{t-1})\\
 *        W_t = W_{t-1} + v_t
 *
 *        It updates the weights using::
 *
 *        v = momentum * v - learning_rate * gradient
 *        weight += v
 *
 *        Where the parameter ``momentum`` is the decay rate of momentum estimates at
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L372
 * \param symbol_name name of the resulting symbol
 * \param data Weights, gradients and momentum
 * \param lrs Learning rates.
 * \param wds Weight decay augments the objective function with a regularization term
 *        that penalizes large weights. The penalty scales with the square of the
 * \param momentum The decay rate of momentum estimates at each epoch.
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param num_weights Number of updated weights.
 * \return new symbol
 */
inline Symbol multi_sgd_mom_update(const std::string& symbol_name,
                                   const std::vector<Symbol>& data,
                                   nnvm::Tuple<mx_float> lrs,
                                   nnvm::Tuple<mx_float> wds,
                                   mx_float momentum = 0,
                                   mx_float rescale_grad = 1,
                                   mx_float clip_gradient = -1,
                                   int num_weights = 1) {
  return Operator("multi_sgd_mom_update")
           .SetParam("lrs", lrs)
           .SetParam("wds", wds)
           .SetParam("momentum", momentum)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("num_weights", num_weights)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for multi-precision Stochastic Gradient Descent (SDG) optimizer.
 *
 *        It updates the weights using::
 *
 *        weight = weight - learning_rate * (gradient + wd * weight)
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L415
 * \param symbol_name name of the resulting symbol
 * \param data Weights
 * \param lrs Learning rates.
 * \param wds Weight decay augments the objective function with a regularization term
 *        that penalizes large weights. The penalty scales with the square of the
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param num_weights Number of updated weights.
 * \return new symbol
 */
inline Symbol multi_mp_sgd_update(const std::string& symbol_name,
                                  const std::vector<Symbol>& data,
                                  nnvm::Tuple<mx_float> lrs,
                                  nnvm::Tuple<mx_float> wds,
                                  mx_float rescale_grad = 1,
                                  mx_float clip_gradient = -1,
                                  int num_weights = 1) {
  return Operator("multi_mp_sgd_update")
           .SetParam("lrs", lrs)
           .SetParam("wds", wds)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("num_weights", num_weights)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Momentum update function for multi-precision Stochastic Gradient Descent (SGD)
 *
 *        Momentum update has better convergence rates on neural networks. Mathematically
 *        like below:
 *
 *        .. math::
 *
 *        v_1 = \alpha * \nabla J(W_0)\\
 *        v_t = \gamma v_{t-1} - \alpha * \nabla J(W_{t-1})\\
 *        W_t = W_{t-1} + v_t
 *
 *        It updates the weights using::
 *
 *        v = momentum * v - learning_rate * gradient
 *        weight += v
 *
 *        Where the parameter ``momentum`` is the decay rate of momentum estimates at
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L470
 * \param symbol_name name of the resulting symbol
 * \param data Weights
 * \param lrs Learning rates.
 * \param wds Weight decay augments the objective function with a regularization term
 *        that penalizes large weights. The penalty scales with the square of the
 * \param momentum The decay rate of momentum estimates at each epoch.
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param num_weights Number of updated weights.
 * \return new symbol
 */
inline Symbol multi_mp_sgd_mom_update(const std::string& symbol_name,
                                      const std::vector<Symbol>& data,
                                      nnvm::Tuple<mx_float> lrs,
                                      nnvm::Tuple<mx_float> wds,
                                      mx_float momentum = 0,
                                      mx_float rescale_grad = 1,
                                      mx_float clip_gradient = -1,
                                      int num_weights = 1) {
  return Operator("multi_mp_sgd_mom_update")
           .SetParam("lrs", lrs)
           .SetParam("wds", wds)
           .SetParam("momentum", momentum)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("num_weights", num_weights)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for Stochastic Gradient Descent (SGD) optimizer.
 *
 *        It updates the weights using::
 *
 *        weight = weight - learning_rate * (gradient + wd * weight)
 *
 *        However, if gradient is of ``row_sparse`` storage type and ``lazy_update`` is
 *        only the row slices whose indices appear in grad.indices are updated::
 *
 *        for row in gradient.indices:
 *        weight[row] = weight[row] - learning_rate * (gradient[row] + wd * weight[row])
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L522
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param lr Learning rate
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param lazy_update If true, lazy updates are applied if gradient's stype is row_sparse.
 * \return new symbol
 */
inline Symbol sgd_update(const std::string& symbol_name,
                         Symbol weight,
                         Symbol grad,
                         mx_float lr,
                         mx_float wd = 0,
                         mx_float rescale_grad = 1,
                         mx_float clip_gradient = -1,
                         bool lazy_update = true) {
  return Operator("sgd_update")
           .SetParam("lr", lr)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("lazy_update", lazy_update)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Momentum update function for Stochastic Gradient Descent (SGD) optimizer.
 *
 *        Momentum update has better convergence rates on neural networks. Mathematically
 *        like below:
 *
 *        .. math::
 *
 *        v_1 = \alpha * \nabla J(W_0)\\
 *        v_t = \gamma v_{t-1} - \alpha * \nabla J(W_{t-1})\\
 *        W_t = W_{t-1} + v_t
 *
 *        It updates the weights using::
 *
 *        v = momentum * v - learning_rate * gradient
 *        weight += v
 *
 *        Where the parameter ``momentum`` is the decay rate of momentum estimates at
 *
 *        However, if grad's storage type is ``row_sparse``, ``lazy_update`` is True and
 *        type is the same as momentum's storage type,
 *        only the row slices whose indices appear in grad.indices are updated (for both
 *
 *        for row in gradient.indices:
 *        v[row] = momentum[row] * v[row] - learning_rate * gradient[row]
 *        weight[row] += v[row]
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L563
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param mom Momentum
 * \param lr Learning rate
 * \param momentum The decay rate of momentum estimates at each epoch.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param lazy_update If true, lazy updates are applied if gradient's stype is row_sparse
 * \return new symbol
 */
inline Symbol sgd_mom_update(const std::string& symbol_name,
                             Symbol weight,
                             Symbol grad,
                             Symbol mom,
                             mx_float lr,
                             mx_float momentum = 0,
                             mx_float wd = 0,
                             mx_float rescale_grad = 1,
                             mx_float clip_gradient = -1,
                             bool lazy_update = true) {
  return Operator("sgd_mom_update")
           .SetParam("lr", lr)
           .SetParam("momentum", momentum)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("lazy_update", lazy_update)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mom", mom)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Updater function for multi-precision sgd optimizer
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad gradient
 * \param weight32 Weight32
 * \param lr Learning rate
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param lazy_update If true, lazy updates are applied if gradient's stype is row_sparse.
 * \return new symbol
 */
inline Symbol mp_sgd_update(const std::string& symbol_name,
                            Symbol weight,
                            Symbol grad,
                            Symbol weight32,
                            mx_float lr,
                            mx_float wd = 0,
                            mx_float rescale_grad = 1,
                            mx_float clip_gradient = -1,
                            bool lazy_update = true) {
  return Operator("mp_sgd_update")
           .SetParam("lr", lr)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("lazy_update", lazy_update)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("weight32", weight32)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Updater function for multi-precision sgd optimizer
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param mom Momentum
 * \param weight32 Weight32
 * \param lr Learning rate
 * \param momentum The decay rate of momentum estimates at each epoch.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param lazy_update If true, lazy updates are applied if gradient's stype is row_sparse
 * \return new symbol
 */
inline Symbol mp_sgd_mom_update(const std::string& symbol_name,
                                Symbol weight,
                                Symbol grad,
                                Symbol mom,
                                Symbol weight32,
                                mx_float lr,
                                mx_float momentum = 0,
                                mx_float wd = 0,
                                mx_float rescale_grad = 1,
                                mx_float clip_gradient = -1,
                                bool lazy_update = true) {
  return Operator("mp_sgd_mom_update")
           .SetParam("lr", lr)
           .SetParam("momentum", momentum)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("lazy_update", lazy_update)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mom", mom)
           .SetInput("weight32", weight32)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief The FTML optimizer described in
 *        *FTML - Follow the Moving Leader in Deep Learning*,
 *        available at http://proceedings.mlr.press/v70/zheng17a/zheng17a.pdf.
 *
 *        .. math::
 *
 *        g_t = \nabla J(W_{t-1})\\
 *        v_t = \beta_2 v_{t-1} + (1 - \beta_2) g_t^2\\
 *        d_t = \frac{ 1 - \beta_1^t }{ \eta_t } (\sqrt{ \frac{ v_t }{ 1 - \beta_2^t } }
 *        \sigma_t = d_t - \beta_1 d_{t-1}
 *        z_t = \beta_1 z_{ t-1 } + (1 - \beta_1^t) g_t - \sigma_t W_{t-1}
 *        W_t = - \frac{ z_t }{ d_t }
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L638
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param d Internal state ``d_t``
 * \param v Internal state ``v_t``
 * \param z Internal state ``z_t``
 * \param lr Learning rate.
 * \param t Number of update.
 * \param beta1 Generally close to 0.5.
 * \param beta2 Generally close to 1.
 * \param epsilon Epsilon to prevent div 0.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_grad Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol ftml_update(const std::string& symbol_name,
                          Symbol weight,
                          Symbol grad,
                          Symbol d,
                          Symbol v,
                          Symbol z,
                          mx_float lr,
                          int t,
                          mx_float beta1 = 0.600000024,
                          mx_float beta2 = 0.999000013,
                          double epsilon = 9.9999999392252903e-09,
                          mx_float wd = 0,
                          mx_float rescale_grad = 1,
                          mx_float clip_grad = -1) {
  return Operator("ftml_update")
           .SetParam("lr", lr)
           .SetParam("t", t)
           .SetParam("beta1", beta1)
           .SetParam("beta2", beta2)
           .SetParam("epsilon", epsilon)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_grad", clip_grad)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("d", d)
           .SetInput("v", v)
           .SetInput("z", z)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for Adam optimizer. Adam is seen as a generalization
 *        of AdaGrad.
 *
 *        Adam update consists of the following steps, where g represents gradient and m,
 *        are 1st and 2nd order moment estimates (mean and variance).
 *
 *        .. math::
 *
 *        g_t = \nabla J(W_{t-1})\\
 *        m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t\\
 *        v_t = \beta_2 v_{t-1} + (1 - \beta_2) g_t^2\\
 *        W_t = W_{t-1} - \alpha \frac{ m_t }{ \sqrt{ v_t } + \epsilon }
 *
 *        It updates the weights using::
 *
 *        m = beta1*m + (1-beta1)*grad
 *        v = beta2*v + (1-beta2)*(grad**2)
 *        w += - learning_rate * m / (sqrt(v) + epsilon)
 *
 *        However, if grad's storage type is ``row_sparse``, ``lazy_update`` is True and
 *        type of weight is the same as those of m and v,
 *        only the row slices whose indices appear in grad.indices are updated (for w, m
 *
 *        for row in grad.indices:
 *        m[row] = beta1*m[row] + (1-beta1)*grad[row]
 *        v[row] = beta2*v[row] + (1-beta2)*(grad[row]**2)
 *        w[row] += - learning_rate * m[row] / (sqrt(v[row]) + epsilon)
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L686
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param mean Moving mean
 * \param var Moving variance
 * \param lr Learning rate
 * \param beta1 The decay rate for the 1st moment estimates.
 * \param beta2 The decay rate for the 2nd moment estimates.
 * \param epsilon A small constant for numerical stability.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param lazy_update If true, lazy updates are applied if gradient's stype is row_sparse
 * \return new symbol
 */
inline Symbol adam_update(const std::string& symbol_name,
                          Symbol weight,
                          Symbol grad,
                          Symbol mean,
                          Symbol var,
                          mx_float lr,
                          mx_float beta1 = 0.899999976,
                          mx_float beta2 = 0.999000013,
                          mx_float epsilon = 9.99999994e-09,
                          mx_float wd = 0,
                          mx_float rescale_grad = 1,
                          mx_float clip_gradient = -1,
                          bool lazy_update = true) {
  return Operator("adam_update")
           .SetParam("lr", lr)
           .SetParam("beta1", beta1)
           .SetParam("beta2", beta2)
           .SetParam("epsilon", epsilon)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("lazy_update", lazy_update)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mean", mean)
           .SetInput("var", var)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for Nesterov Accelerated Gradient( NAG) optimizer.
 *        It updates the weights using the following formula,
 *
 *        .. math::
 *        v_t = \gamma v_{t-1} + \eta * \nabla J(W_{t-1} - \gamma v_{t-1})\\
 *        W_t = W_{t-1} - v_t
 *
 *        Where
 *        :math:`\eta` is the learning rate of the optimizer
 *        :math:`\gamma` is the decay rate of the momentum estimate
 *        :math:`\v_t` is the update vector at time step `t`
 *        :math:`\W_t` is the weight vector at time step `t`
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L724
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param mom Momentum
 * \param lr Learning rate
 * \param momentum The decay rate of momentum estimates at each epoch.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol nag_mom_update(const std::string& symbol_name,
                             Symbol weight,
                             Symbol grad,
                             Symbol mom,
                             mx_float lr,
                             mx_float momentum = 0,
                             mx_float wd = 0,
                             mx_float rescale_grad = 1,
                             mx_float clip_gradient = -1) {
  return Operator("nag_mom_update")
           .SetParam("lr", lr)
           .SetParam("momentum", momentum)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mom", mom)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for multi-precision Nesterov Accelerated Gradient( NAG)
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L743
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param mom Momentum
 * \param weight32 Weight32
 * \param lr Learning rate
 * \param momentum The decay rate of momentum estimates at each epoch.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol mp_nag_mom_update(const std::string& symbol_name,
                                Symbol weight,
                                Symbol grad,
                                Symbol mom,
                                Symbol weight32,
                                mx_float lr,
                                mx_float momentum = 0,
                                mx_float wd = 0,
                                mx_float rescale_grad = 1,
                                mx_float clip_gradient = -1) {
  return Operator("mp_nag_mom_update")
           .SetParam("lr", lr)
           .SetParam("momentum", momentum)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mom", mom)
           .SetInput("weight32", weight32)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for `RMSProp` optimizer.
 *
 *        `RMSprop` is a variant of stochastic gradient descent where the gradients are
 *        divided by a cache which grows with the sum of squares of recent gradients?
 *
 *        `RMSProp` is similar to `AdaGrad`, a popular variant of `SGD` which adaptively
 *        tunes the learning rate of each parameter. `AdaGrad` lowers the learning rate
 *        each parameter monotonically over the course of training.
 *        While this is analytically motivated for convex optimizations, it may not be
 *        for non-convex problems. `RMSProp` deals with this heuristically by allowing the
 *        learning rates to rebound as the denominator decays over time.
 *
 *        Define the Root Mean Square (RMS) error criterion of the gradient as
 *        :math:`RMS[g]_t = \sqrt{E[g^2]_t + \epsilon}`, where :math:`g` represents
 *        gradient and :math:`E[g^2]_t` is the decaying average over past squared
 *
 *        The :math:`E[g^2]_t` is given by:
 *
 *        .. math::
 *        E[g^2]_t = \gamma * E[g^2]_{t-1} + (1-\gamma) * g_t^2
 *
 *        The update step is
 *
 *        .. math::
 *        \theta_{t+1} = \theta_t - \frac{\eta}{RMS[g]_t} g_t
 *
 *        The RMSProp code follows the version in
 *        http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
 *        Tieleman & Hinton, 2012.
 *
 *        Hinton suggests the momentum term :math:`\gamma` to be 0.9 and the learning rate
 *        :math:`\eta` to be 0.001.
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L795
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param n n
 * \param lr Learning rate
 * \param gamma1 The decay rate of momentum estimates.
 * \param epsilon A small constant for numerical stability.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param clip_weights Clip weights to the range of [-clip_weights, clip_weights] If
 *        clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,
 * \return new symbol
 */
inline Symbol rmsprop_update(const std::string& symbol_name,
                             Symbol weight,
                             Symbol grad,
                             Symbol n,
                             mx_float lr,
                             mx_float gamma1 = 0.949999988,
                             mx_float epsilon = 9.99999994e-09,
                             mx_float wd = 0,
                             mx_float rescale_grad = 1,
                             mx_float clip_gradient = -1,
                             mx_float clip_weights = -1) {
  return Operator("rmsprop_update")
           .SetParam("lr", lr)
           .SetParam("gamma1", gamma1)
           .SetParam("epsilon", epsilon)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("clip_weights", clip_weights)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("n", n)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for RMSPropAlex optimizer.
 *
 *        `RMSPropAlex` is non-centered version of `RMSProp`.
 *
 *        Define :math:`E[g^2]_t` is the decaying average over past squared gradient and
 *        :math:`E[g]_t` is the decaying average over past gradient.
 *
 *        .. math::
 *        E[g^2]_t = \gamma_1 * E[g^2]_{t-1} + (1 - \gamma_1) * g_t^2\\
 *        E[g]_t = \gamma_1 * E[g]_{t-1} + (1 - \gamma_1) * g_t\\
 *        \Delta_t = \gamma_2 * \Delta_{t-1} - \frac{\eta}{\sqrt{E[g^2]_t - E[g]_t^2 +
 *
 *        The update step is
 *
 *        .. math::
 *        \theta_{t+1} = \theta_t + \Delta_t
 *
 *        The RMSPropAlex code follows the version in
 *        http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.
 *
 *        Graves suggests the momentum term :math:`\gamma_1` to be 0.95, :math:`\gamma_2`
 *        to be 0.9 and the learning rate :math:`\eta` to be 0.0001.
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L834
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param n n
 * \param g g
 * \param delta delta
 * \param lr Learning rate
 * \param gamma1 Decay rate.
 * \param gamma2 Decay rate.
 * \param epsilon A small constant for numerical stability.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param clip_weights Clip weights to the range of [-clip_weights, clip_weights] If
 *        clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,
 * \return new symbol
 */
inline Symbol rmspropalex_update(const std::string& symbol_name,
                                 Symbol weight,
                                 Symbol grad,
                                 Symbol n,
                                 Symbol g,
                                 Symbol delta,
                                 mx_float lr,
                                 mx_float gamma1 = 0.949999988,
                                 mx_float gamma2 = 0.899999976,
                                 mx_float epsilon = 9.99999994e-09,
                                 mx_float wd = 0,
                                 mx_float rescale_grad = 1,
                                 mx_float clip_gradient = -1,
                                 mx_float clip_weights = -1) {
  return Operator("rmspropalex_update")
           .SetParam("lr", lr)
           .SetParam("gamma1", gamma1)
           .SetParam("gamma2", gamma2)
           .SetParam("epsilon", epsilon)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("clip_weights", clip_weights)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("n", n)
           .SetInput("g", g)
           .SetInput("delta", delta)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for Ftrl optimizer.
 *        Referenced from *Ad Click Prediction: a View from the Trenches*, available at
 *        http://dl.acm.org/citation.cfm?id=2488200.
 *
 *        It updates the weights using::
 *
 *        rescaled_grad = clip(grad * rescale_grad, clip_gradient)
 *        z += rescaled_grad - (sqrt(n + rescaled_grad**2) - sqrt(n)) * weight /
 *        n += rescaled_grad**2
 *        w = (sign(z) * lamda1 - z) / ((beta + sqrt(n)) / learning_rate + wd) * (abs(z)
 *
 *        If w, z and n are all of ``row_sparse`` storage type,
 *        only the row slices whose indices appear in grad.indices are updated (for w, z
 *
 *        for row in grad.indices:
 *        rescaled_grad[row] = clip(grad[row] * rescale_grad, clip_gradient)
 *        z[row] += rescaled_grad[row] - (sqrt(n[row] + rescaled_grad[row]**2) -
 *        n[row] += rescaled_grad[row]**2
 *        w[row] = (sign(z[row]) * lamda1 - z[row]) / ((beta + sqrt(n[row])) /
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L874
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param z z
 * \param n Square of grad
 * \param lr Learning rate
 * \param lamda1 The L1 regularization coefficient.
 * \param beta Per-Coordinate Learning Rate beta.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol ftrl_update(const std::string& symbol_name,
                          Symbol weight,
                          Symbol grad,
                          Symbol z,
                          Symbol n,
                          mx_float lr,
                          mx_float lamda1 = 0.00999999978,
                          mx_float beta = 1,
                          mx_float wd = 0,
                          mx_float rescale_grad = 1,
                          mx_float clip_gradient = -1) {
  return Operator("ftrl_update")
           .SetParam("lr", lr)
           .SetParam("lamda1", lamda1)
           .SetParam("beta", beta)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("z", z)
           .SetInput("n", n)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Update function for AdaGrad optimizer.
 *
 *        Referenced from *Adaptive Subgradient Methods for Online Learning and
 *        and available at http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf.
 *
 *        Updates are applied by::
 *
 *        rescaled_grad = clip(grad * rescale_grad, clip_gradient)
 *        history = history + square(rescaled_grad)
 *        w = w - learning_rate * rescaled_grad / sqrt(history + epsilon)
 *
 *        Note that non-zero values for the weight decay option are not supported.
 *
 *
 *
 *        Defined in src/operator/optimizer_op.cc:L907
 * \param symbol_name name of the resulting symbol
 * \param weight Weight
 * \param grad Gradient
 * \param history History
 * \param lr Learning rate
 * \param epsilon epsilon
 * \param wd weight decay
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol _sparse_adagrad_update(const std::string& symbol_name,
                                     Symbol weight,
                                     Symbol grad,
                                     Symbol history,
                                     mx_float lr,
                                     mx_float epsilon = 1.00000001e-07,
                                     mx_float wd = 0,
                                     mx_float rescale_grad = 1,
                                     mx_float clip_gradient = -1) {
  return Operator("_sparse_adagrad_update")
           .SetParam("lr", lr)
           .SetParam("epsilon", epsilon)
           .SetParam("wd", wd)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("history", history)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Run a for loop over an NDArray with user-defined computation
 *
 *        From:src/operator/control_flow.cc:1090
 * \param symbol_name name of the resulting symbol
 * \param fn Input graph.
 * \param data The input arrays that include data arrays and states.
 * \param num_args Number of inputs.
 * \param num_outputs The number of outputs of the subgraph.
 * \param num_out_data The number of output data of the subgraph.
 * \param in_state_locs The locations of loop states among the inputs.
 * \param in_data_locs The locations of input data among the inputs.
 * \param remain_locs The locations of remaining data among the inputs.
 * \return new symbol
 */
inline Symbol _foreach(const std::string& symbol_name,
                       Symbol fn,
                       const std::vector<Symbol>& data,
                       int num_args,
                       int num_outputs,
                       int num_out_data,
                       nnvm::Tuple<int64_t> in_state_locs,
                       nnvm::Tuple<int64_t> in_data_locs,
                       nnvm::Tuple<int64_t> remain_locs) {
  return Operator("_foreach")
           .SetParam("num_args", num_args)
           .SetParam("num_outputs", num_outputs)
           .SetParam("num_out_data", num_out_data)
           .SetParam("in_state_locs", in_state_locs)
           .SetParam("in_data_locs", in_data_locs)
           .SetParam("remain_locs", remain_locs)
           .SetInput("fn", fn)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Run a while loop over with user-defined condition and computation
 *
 *        From:src/operator/control_flow.cc:1151
 * \param symbol_name name of the resulting symbol
 * \param cond Input graph for the loop condition.
 * \param func Input graph for the loop body.
 * \param data The input arrays that include data arrays and states.
 * \param num_args Number of input arguments, including cond and func as two symbol
 * \param num_outputs The number of outputs of the subgraph.
 * \param num_out_data The number of outputs from the function body.
 * \param max_iterations Maximum number of iterations.
 * \param cond_input_locs The locations of cond's inputs in the given inputs.
 * \param func_input_locs The locations of func's inputs in the given inputs.
 * \param func_var_locs The locations of loop_vars among func's inputs.
 * \return new symbol
 */
inline Symbol _while_loop(const std::string& symbol_name,
                          Symbol cond,
                          Symbol func,
                          const std::vector<Symbol>& data,
                          int num_args,
                          int num_outputs,
                          int num_out_data,
                          int max_iterations,
                          nnvm::Tuple<int64_t> cond_input_locs,
                          nnvm::Tuple<int64_t> func_input_locs,
                          nnvm::Tuple<int64_t> func_var_locs) {
  return Operator("_while_loop")
           .SetParam("num_args", num_args)
           .SetParam("num_outputs", num_outputs)
           .SetParam("num_out_data", num_out_data)
           .SetParam("max_iterations", max_iterations)
           .SetParam("cond_input_locs", cond_input_locs)
           .SetParam("func_input_locs", func_input_locs)
           .SetParam("func_var_locs", func_var_locs)
           .SetInput("cond", cond)
           .SetInput("func", func)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Run a if-then-else using user-defined condition and computation
 *
 *        From:src/operator/control_flow.cc:1212
 * \param symbol_name name of the resulting symbol
 * \param cond Input graph for the condition.
 * \param then_branch Input graph for the then branch.
 * \param else_branch Input graph for the else branch.
 * \param data The input arrays that include data arrays and states.
 * \param num_args Number of input arguments, including cond, then and else as three
 * \param num_outputs The number of outputs of the subgraph.
 * \param cond_input_locs The locations of cond's inputs in the given inputs.
 * \param then_input_locs The locations of then's inputs in the given inputs.
 * \param else_input_locs The locations of else's inputs in the given inputs.
 * \return new symbol
 */
inline Symbol _cond(const std::string& symbol_name,
                    Symbol cond,
                    Symbol then_branch,
                    Symbol else_branch,
                    const std::vector<Symbol>& data,
                    int num_args,
                    int num_outputs,
                    nnvm::Tuple<int64_t> cond_input_locs,
                    nnvm::Tuple<int64_t> then_input_locs,
                    nnvm::Tuple<int64_t> else_input_locs) {
  return Operator("_cond")
           .SetParam("num_args", num_args)
           .SetParam("num_outputs", num_outputs)
           .SetParam("cond_input_locs", cond_input_locs)
           .SetParam("then_input_locs", then_input_locs)
           .SetParam("else_input_locs", else_input_locs)
           .SetInput("cond", cond)
           .SetInput("then_branch", then_branch)
           .SetInput("else_branch", else_branch)
(data)
           .CreateSymbol(symbol_name);
}

/*! \brief Normalizes the gradient.
 */
enum class SoftmaxOutputNormalization {
  kBatch = 0,
  kNull = 1,
  kValid = 2
};

/*!
 * \brief Computes the gradient of cross entropy loss with respect to softmax output.
 *
 *        - This operator computes the gradient in two steps.
 *        The cross entropy loss does not actually need to be computed.
 *
 *        - Applies softmax function on the input array.
 *        - Computes and returns the gradient of cross entropy loss w.r.t. the softmax
 *
 *        - The softmax function, cross entropy loss and gradient is given by:
 *
 *        - Softmax Function:
 *
 *        .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}
 *
 *        - Cross Entropy Function:
 *
 *        .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i
 *
 *        - The gradient of cross entropy loss w.r.t softmax output:
 *
 *        .. math:: \text{gradient} = \text{output} - \text{label}
 *
 *        - During forward propagation, the softmax function is computed for each
 *
 *        For general *N*-D input arrays with shape :math:`(d_1, d_2, ..., d_n)`. The
 *        :math:`s=d_1 \cdot d_2 \cdot \cdot \cdot d_n`. We can use the parameters
 *        and `multi_output` to specify the way to compute softmax:
 *
 *        - By default, `preserve_shape` is ``false``. This operator will reshape the
 *        into a 2-D array with shape :math:`(d_1, \frac{s}{d_1})` and then compute the
 *        each row in the reshaped array, and afterwards reshape it back to the original
 *        :math:`(d_1, d_2, ..., d_n)`.
 *        - If `preserve_shape` is ``true``, the softmax function will be computed along
 *        the last axis (`axis` = ``-1``).
 *        - If `multi_output` is ``true``, the softmax function will be computed along
 *        the second axis (`axis` = ``1``).
 *
 *        - During backward propagation, the gradient of cross-entropy loss w.r.t softmax
 *        The provided label can be a one-hot label array or a probability label array.
 *
 *        - If the parameter `use_ignore` is ``true``, `ignore_label` can specify input
 *        with a particular label to be ignored during backward propagation. **This has
 *        softmax `output` has same shape as `label`**.
 *
 *        Example::
 *
 *        data = [[1,2,3,4],[2,2,2,2],[3,3,3,3],[4,4,4,4]]
 *        label = [1,0,2,3]
 *        ignore_label = 1
 *        SoftmaxOutput(data=data, label = label,\
 *        multi_output=true, use_ignore=true,\
 *        ignore_label=ignore_label)
 *        ## forward softmax output
 *        [[ 0.0320586   0.08714432  0.23688284  0.64391428]
 *        [ 0.25        0.25        0.25        0.25      ]
 *        [ 0.25        0.25        0.25        0.25      ]
 *        [ 0.25        0.25        0.25        0.25      ]]
 *        ## backward gradient output
 *        [[ 0.    0.    0.    0.  ]
 *        [-0.75  0.25  0.25  0.25]
 *        [ 0.25  0.25 -0.75  0.25]
 *        [ 0.25  0.25  0.25 -0.75]]
 *        ## notice that the first row is all 0 because label[0] is 1, which is equal to
 *
 *        - The parameter `grad_scale` can be used to rescale the gradient, which is
 *        give each loss function different weights.
 *
 *        - This operator also supports various ways to normalize the gradient by
 *        The `normalization` is applied if softmax output has different shape than the
 *        The `normalization` mode can be set to the followings:
 *
 *        - ``'null'``: do nothing.
 *        - ``'batch'``: divide the gradient by the batch size.
 *        - ``'valid'``: divide the gradient by the number of instances which are not
 *
 *
 *
 *        Defined in src/operator/softmax_output.cc:L230
 * \param symbol_name name of the resulting symbol
 * \param data Input array.
 * \param label Ground truth label.
 * \param grad_scale Scales the gradient by a float factor.
 * \param ignore_label The instances whose `labels` == `ignore_label` will be ignored
 * \param multi_output If set to ``true``, the softmax function will be computed along
 *        axis ``1``. This is applied when the shape of input array differs from the
 * \param use_ignore If set to ``true``, the `ignore_label` value will not contribute to
 * \param preserve_shape If set to ``true``, the softmax function will be computed along
 * \param normalization Normalizes the gradient.
 * \param out_grad Multiplies gradient with output gradient element-wise.
 * \param smooth_alpha Constant for computing a label smoothed version of
 *        cross-entropyfor the backwards pass.  This constant gets subtracted from
 *        theone-hot encoding of the gold label and distributed uniformly toall other
 * \return new symbol
 */
inline Symbol SoftmaxOutput(const std::string& symbol_name,
                            Symbol data,
                            Symbol label,
                            mx_float grad_scale = 1,
                            mx_float ignore_label = -1,
                            bool multi_output = false,
                            bool use_ignore = false,
                            bool preserve_shape = false,
                            SoftmaxOutputNormalization normalization = SoftmaxOutputNormalization::kNull,
                            bool out_grad = false,
                            mx_float smooth_alpha = 0) {
  static const char *SoftmaxOutputNormalizationValues[] = {
    "batch",
    "null",
    "valid"
  };
  return Operator("SoftmaxOutput")
           .SetParam("grad_scale", grad_scale)
           .SetParam("ignore_label", ignore_label)
           .SetParam("multi_output", multi_output)
           .SetParam("use_ignore", use_ignore)
           .SetParam("preserve_shape", preserve_shape)
           .SetParam("normalization", SoftmaxOutputNormalizationValues[int(normalization)])
           .SetParam("out_grad", out_grad)
           .SetParam("smooth_alpha", smooth_alpha)
           .SetInput("data", data)
           .SetInput("label", label)
           .CreateSymbol(symbol_name);
}

/*! \brief Activation function to be applied.
 */
enum class LeakyReLUActType {
  kElu = 0,
  kGelu = 1,
  kLeaky = 2,
  kPrelu = 3,
  kRrelu = 4,
  kSelu = 5
};

/*!
 * \brief Applies Leaky rectified linear unit activation element-wise to the input.
 *
 *        Leaky ReLUs attempt to fix the "dying ReLU" problem by allowing a small `slope`
 *        when the input is negative and has a slope of one when input is positive.
 *
 *        The following modified ReLU Activation functions are supported:
 *
 *        - *elu*: Exponential Linear Unit. `y = x > 0 ? x : slope * (exp(x)-1)`
 *        - *selu*: Scaled Exponential Linear Unit. `y = lambda * (x > 0 ? x : alpha *
 *        *lambda = 1.0507009873554804934193349852946* and *alpha =
 *        - *leaky*: Leaky ReLU. `y = x > 0 ? x : slope * x`
 *        - *prelu*: Parametric ReLU. This is same as *leaky* except that `slope` is
 *        - *rrelu*: Randomized ReLU. same as *leaky* but the `slope` is uniformly and
 *        *[lower_bound, upper_bound)* for training, while fixed to be
 *        *(lower_bound+upper_bound)/2* for inference.
 *
 *
 *
 *        Defined in src/operator/leaky_relu.cc:L65
 * \param symbol_name name of the resulting symbol
 * \param data Input data to activation function.
 * \param gamma Slope parameter for PReLU. Only required when act_type is 'prelu'. It
 *        should be either a vector of size 1, or the same size as the second dimension
 * \param act_type Activation function to be applied.
 * \param slope Init slope for the activation. (For leaky and elu only)
 * \param lower_bound Lower bound of random slope. (For rrelu only)
 * \param upper_bound Upper bound of random slope. (For rrelu only)
 * \return new symbol
 */
inline Symbol LeakyReLU(const std::string& symbol_name,
                        Symbol data,
                        Symbol gamma,
                        LeakyReLUActType act_type = LeakyReLUActType::kLeaky,
                        mx_float slope = 0.25,
                        mx_float lower_bound = 0.125,
                        mx_float upper_bound = 0.333999991) {
  static const char *LeakyReLUActTypeValues[] = {
    "elu",
    "gelu",
    "leaky",
    "prelu",
    "rrelu",
    "selu"
  };
  return Operator("LeakyReLU")
           .SetParam("act_type", LeakyReLUActTypeValues[int(act_type)])
           .SetParam("slope", slope)
           .SetParam("lower_bound", lower_bound)
           .SetParam("upper_bound", upper_bound)
           .SetInput("data", data)
           .SetInput("gamma", gamma)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes and optimizes for squared loss during backward propagation.
 *        Just outputs ``data`` during forward propagation.
 *
 *        If :math:`\hat{y}_i` is the predicted value of the i-th sample, and :math:`y_i`
 *        then the squared loss estimated over :math:`n` samples is defined as
 *
 *        :math:`\text{SquaredLoss}(\textbf{Y}, \hat{\textbf{Y}} ) = \frac{1}{n}
 *
 *        .. note::
 *        Use the LinearRegressionOutput as the final output layer of a net.
 *
 *        The storage type of ``label`` can be ``default`` or ``csr``
 *
 *        - LinearRegressionOutput(default, default) = default
 *        - LinearRegressionOutput(default, csr) = default
 *
 *        By default, gradients of this loss function are scaled by factor `1/m`, where m
 *        The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.
 *
 *
 *
 *        Defined in src/operator/regression_output.cc:L92
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the function.
 * \param label Input label to the function.
 * \param grad_scale Scale the gradient by a float factor
 * \return new symbol
 */
inline Symbol LinearRegressionOutput(const std::string& symbol_name,
                                     Symbol data,
                                     Symbol label,
                                     mx_float grad_scale = 1) {
  return Operator("LinearRegressionOutput")
           .SetParam("grad_scale", grad_scale)
           .SetInput("data", data)
           .SetInput("label", label)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes mean absolute error of the input.
 *
 *        MAE is a risk metric corresponding to the expected value of the absolute error.
 *
 *        If :math:`\hat{y}_i` is the predicted value of the i-th sample, and :math:`y_i`
 *        then the mean absolute error (MAE) estimated over :math:`n` samples is defined
 *
 *        :math:`\text{MAE}(\textbf{Y}, \hat{\textbf{Y}} ) = \frac{1}{n} \sum_{i=0}^{n-1}
 *
 *        .. note::
 *        Use the MAERegressionOutput as the final output layer of a net.
 *
 *        The storage type of ``label`` can be ``default`` or ``csr``
 *
 *        - MAERegressionOutput(default, default) = default
 *        - MAERegressionOutput(default, csr) = default
 *
 *        By default, gradients of this loss function are scaled by factor `1/m`, where m
 *        The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.
 *
 *
 *
 *        Defined in src/operator/regression_output.cc:L120
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the function.
 * \param label Input label to the function.
 * \param grad_scale Scale the gradient by a float factor
 * \return new symbol
 */
inline Symbol MAERegressionOutput(const std::string& symbol_name,
                                  Symbol data,
                                  Symbol label,
                                  mx_float grad_scale = 1) {
  return Operator("MAERegressionOutput")
           .SetParam("grad_scale", grad_scale)
           .SetInput("data", data)
           .SetInput("label", label)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Applies a logistic function to the input.
 *
 *        The logistic function, also known as the sigmoid function, is computed as
 *        :math:`\frac{1}{1+exp(-\textbf{x})}`.
 *
 *        Commonly, the sigmoid is used to squash the real-valued output of a linear model
 *        :math:`wTx+b` into the [0,1] range so that it can be interpreted as a
 *        It is suitable for binary classification or probability prediction tasks.
 *
 *        .. note::
 *        Use the LogisticRegressionOutput as the final output layer of a net.
 *
 *        The storage type of ``label`` can be ``default`` or ``csr``
 *
 *        - LogisticRegressionOutput(default, default) = default
 *        - LogisticRegressionOutput(default, csr) = default
 *
 *        The loss function used is the Binary Cross Entropy Loss:
 *
 *        :math:`-{(y\log(p) + (1 - y)\log(1 - p))}`
 *
 *        Where `y` is the ground truth probability of positive outcome for a given
 *        example, and `p` the probability predicted by the model. By default, gradients
 *        of this loss function are scaled by factor `1/m`, where m is the number of
 *        The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.
 *
 *
 *
 *        Defined in src/operator/regression_output.cc:L152
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the function.
 * \param label Input label to the function.
 * \param grad_scale Scale the gradient by a float factor
 * \return new symbol
 */
inline Symbol LogisticRegressionOutput(const std::string& symbol_name,
                                       Symbol data,
                                       Symbol label,
                                       mx_float grad_scale = 1) {
  return Operator("LogisticRegressionOutput")
           .SetParam("grad_scale", grad_scale)
           .SetInput("data", data)
           .SetInput("label", label)
           .CreateSymbol(symbol_name);
}

/*! \brief Padding type to use. "constant" pads with `constant_value` "edge" pads using
 *        the edge values of the input array "reflect" pads by reflecting values with
 */
enum class PadMode {
  kConstant = 0,
  kEdge = 1,
  kReflect = 2
};

/*!
 * \brief Pads an input array with a constant or edge values of the array.
 *
 *        .. note:: `Pad` is deprecated. Use `pad` instead.
 *
 *        .. note:: Current implementation only supports 4D and 5D input arrays with
 *        only on axes 1, 2 and 3. Expects axes 4 and 5 in `pad_width` to be zero.
 *
 *        This operation pads an input array with either a `constant_value` or edge values
 *        along each axis of the input array. The amount of padding is specified by
 *
 *        `pad_width` is a tuple of integer padding widths for each axis of the format
 *        ``(before_1, after_1, ... , before_N, after_N)``. The `pad_width` should be of
 *        where ``N`` is the number of dimensions of the array.
 *
 *        For dimension ``N`` of the input array, ``before_N`` and ``after_N`` indicates
 *        to add before and after the elements of the array along dimension ``N``.
 *        The widths of the higher two dimensions ``before_1``, ``after_1``, ``before_2``,
 *        ``after_2`` must be 0.
 *
 *        Example::
 *
 *        x = [[[[  1.   2.   3.]
 *        [  4.   5.   6.]]
 *
 *        [[  7.   8.   9.]
 *        [ 10.  11.  12.]]]
 *
 *
 *        [[[ 11.  12.  13.]
 *        [ 14.  15.  16.]]
 *
 *        [[ 17.  18.  19.]
 *        [ 20.  21.  22.]]]]
 *
 *        pad(x,mode="edge", pad_width=(0,0,0,0,1,1,1,1)) =
 *
 *        [[[[  1.   1.   2.   3.   3.]
 *        [  1.   1.   2.   3.   3.]
 *        [  4.   4.   5.   6.   6.]
 *        [  4.   4.   5.   6.   6.]]
 *
 *        [[  7.   7.   8.   9.   9.]
 *        [  7.   7.   8.   9.   9.]
 *        [ 10.  10.  11.  12.  12.]
 *        [ 10.  10.  11.  12.  12.]]]
 *
 *
 *        [[[ 11.  11.  12.  13.  13.]
 *        [ 11.  11.  12.  13.  13.]
 *        [ 14.  14.  15.  16.  16.]
 *        [ 14.  14.  15.  16.  16.]]
 *
 *        [[ 17.  17.  18.  19.  19.]
 *        [ 17.  17.  18.  19.  19.]
 *        [ 20.  20.  21.  22.  22.]
 *        [ 20.  20.  21.  22.  22.]]]]
 *
 *        pad(x, mode="constant", constant_value=0, pad_width=(0,0,0,0,1,1,1,1)) =
 *
 *        [[[[  0.   0.   0.   0.   0.]
 *        [  0.   1.   2.   3.   0.]
 *        [  0.   4.   5.   6.   0.]
 *        [  0.   0.   0.   0.   0.]]
 *
 *        [[  0.   0.   0.   0.   0.]
 *        [  0.   7.   8.   9.   0.]
 *        [  0.  10.  11.  12.   0.]
 *        [  0.   0.   0.   0.   0.]]]
 *
 *
 *        [[[  0.   0.   0.   0.   0.]
 *        [  0.  11.  12.  13.   0.]
 *        [  0.  14.  15.  16.   0.]
 *        [  0.   0.   0.   0.   0.]]
 *
 *        [[  0.   0.   0.   0.   0.]
 *        [  0.  17.  18.  19.   0.]
 *        [  0.  20.  21.  22.   0.]
 *        [  0.   0.   0.   0.   0.]]]]
 *
 *
 *
 *
 *        Defined in src/operator/pad.cc:L766
 * \param symbol_name name of the resulting symbol
 * \param data An n-dimensional input array.
 * \param mode Padding type to use. "constant" pads with `constant_value` "edge" pads
 *        using the edge values of the input array "reflect" pads by reflecting values
 * \param pad_width Widths of the padding regions applied to the edges of each axis. It
 *        is a tuple of integer padding widths for each axis of the format ``(before_1,
 *        after_1, ... , before_N, after_N)``. It should be of length ``2*N`` where ``N``
 *        is the number of dimensions of the array.This is equivalent to pad_width in
 * \param constant_value The value used for padding when `mode` is "constant".
 * \return new symbol
 */
inline Symbol Pad(const std::string& symbol_name,
                  Symbol data,
                  PadMode mode,
                  Shape pad_width,
                  double constant_value = 0) {
  static const char *PadModeValues[] = {
    "constant",
    "edge",
    "reflect"
  };
  return Operator("Pad")
           .SetParam("mode", PadModeValues[int(mode)])
           .SetParam("pad_width", pad_width)
           .SetParam("constant_value", constant_value)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Apply a sparse regularization to the output a sigmoid activation function.
 * \param symbol_name name of the resulting symbol
 * \param data Input data.
 * \param sparseness_target The sparseness target
 * \param penalty The tradeoff parameter for the sparseness penalty
 * \param momentum The momentum for running average
 * \return new symbol
 */
inline Symbol IdentityAttachKLSparseReg(const std::string& symbol_name,
                                        Symbol data,
                                        mx_float sparseness_target = 0.100000001,
                                        mx_float penalty = 0.00100000005,
                                        mx_float momentum = 0.899999976) {
  return Operator("IdentityAttachKLSparseReg")
           .SetParam("sparseness_target", sparseness_target)
           .SetParam("penalty", penalty)
           .SetParam("momentum", momentum)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Batch normalization.
 *
 *        This operator is DEPRECATED. Perform BatchNorm on the input.
 *
 *        Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
 *        well as offset ``beta``.
 *
 *        Assume the input has more than one dimension and we normalize along axis 1.
 *        We first compute the mean and variance along this axis:
 *
 *        .. math::
 *
 *        data\_mean[i] = mean(data[:,i,:,...]) \\
 *        data\_var[i] = var(data[:,i,:,...])
 *
 *        Then compute the normalized output, which has the same shape as input, as
 *
 *        .. math::
 *
 *        out[:,i,:,...] = \frac{data[:,i,:,...] -
 *
 *        Both *mean* and *var* returns a scalar by treating the input as a vector.
 *
 *        Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 *        have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
 *        ``data_var`` as well, which are needed for the backward pass.
 *
 *        Besides the inputs and the outputs, this operator accepts two auxiliary
 *        states, ``moving_mean`` and ``moving_var``, which are *k*-length
 *        vectors. They are global statistics for the whole dataset, which are updated
 *        by::
 *
 *        moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
 *        moving_var = moving_var * momentum + data_var * (1 - momentum)
 *
 *        If ``use_global_stats`` is set to be true, then ``moving_mean`` and
 *        ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
 *        the output. It is often used during inference.
 *
 *        Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
 *        then set ``gamma`` to 1 and its gradient to 0.
 *
 *        There's no sparse support for this operator, and it will exhibit problematic
 *        sparse tensors.
 *
 *
 *
 *        Defined in src/operator/batch_norm_v1.cc:L95
 * \param symbol_name name of the resulting symbol
 * \param data Input data to batch normalization
 * \param gamma gamma array
 * \param beta beta array
 * \param eps Epsilon to prevent div 0
 * \param momentum Momentum for moving average
 * \param fix_gamma Fix gamma while training
 * \param use_global_stats Whether use global moving statistics instead of local
 * \param output_mean_var Output All,normal mean and var
 * \return new symbol
 */
inline Symbol BatchNorm_v1(const std::string& symbol_name,
                           Symbol data,
                           Symbol gamma,
                           Symbol beta,
                           mx_float eps = 0.00100000005,
                           mx_float momentum = 0.899999976,
                           bool fix_gamma = true,
                           bool use_global_stats = false,
                           bool output_mean_var = false) {
  return Operator("BatchNorm_v1")
           .SetParam("eps", eps)
           .SetParam("momentum", momentum)
           .SetParam("fix_gamma", fix_gamma)
           .SetParam("use_global_stats", use_global_stats)
           .SetParam("output_mean_var", output_mean_var)
           .SetInput("data", data)
           .SetInput("gamma", gamma)
           .SetInput("beta", beta)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Splits an array along a particular axis into multiple sub-arrays.
 *
 *        .. note:: ``SliceChannel`` is deprecated. Use ``split`` instead.
 *
 *        **Note** that `num_outputs` should evenly divide the length of the axis
 *        along which to split the array.
 *
 *        Example::
 *
 *        x  = [[[ 1.]
 *        [ 2.]]
 *        [[ 3.]
 *        [ 4.]]
 *        [[ 5.]
 *        [ 6.]]]
 *        x.shape = (3, 2, 1)
 *
 *        y = split(x, axis=1, num_outputs=2) // a list of 2 arrays with shape (3, 1, 1)
 *        y = [[[ 1.]]
 *        [[ 3.]]
 *        [[ 5.]]]
 *
 *        [[[ 2.]]
 *        [[ 4.]]
 *        [[ 6.]]]
 *
 *        y[0].shape = (3, 1, 1)
 *
 *        z = split(x, axis=0, num_outputs=3) // a list of 3 arrays with shape (1, 2, 1)
 *        z = [[[ 1.]
 *        [ 2.]]]
 *
 *        [[[ 3.]
 *        [ 4.]]]
 *
 *        [[[ 5.]
 *        [ 6.]]]
 *
 *        z[0].shape = (1, 2, 1)
 *
 *        `squeeze_axis=1` removes the axis with length 1 from the shapes of the output
 *        **Note** that setting `squeeze_axis` to ``1`` removes axis with length 1 only
 *        along the `axis` which it is split.
 *        Also `squeeze_axis` can be set to true only if ``input.shape[axis] ==
 *
 *        Example::
 *
 *        z = split(x, axis=0, num_outputs=3, squeeze_axis=1) // a list of 3 arrays with
 *        z = [[ 1.]
 *        [ 2.]]
 *
 *        [[ 3.]
 *        [ 4.]]
 *
 *        [[ 5.]
 *        [ 6.]]
 *        z[0].shape = (2 ,1 )
 *
 *
 *
 *        Defined in src/operator/slice_channel.cc:L107
 * \param symbol_name name of the resulting symbol
 * \param data The input
 * \param num_outputs Number of splits. Note that this should evenly divide the length of
 * \param axis Axis along which to split.
 * \param squeeze_axis If true, Removes the axis with length 1 from the shapes of the
 *        output arrays. **Note** that setting `squeeze_axis` to ``true`` removes axis
 *        with length 1 only along the `axis` which it is split. Also `squeeze_axis` can
 * \return new symbol
 */
inline Symbol SliceChannel(const std::string& symbol_name,
                           Symbol data,
                           int num_outputs,
                           int axis = 1,
                           bool squeeze_axis = false) {
  return Operator("SliceChannel")
           .SetParam("num_outputs", num_outputs)
           .SetParam("axis", axis)
           .SetParam("squeeze_axis", squeeze_axis)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Interchanges two axes of an array.
 *
 *        Examples::
 *
 *        x = [[1, 2, 3]])
 *        swapaxes(x, 0, 1) = [[ 1],
 *        [ 2],
 *        [ 3]]
 *
 *        x = [[[ 0, 1],
 *        [ 2, 3]],
 *        [[ 4, 5],
 *        [ 6, 7]]]  // (2,2,2) array
 *
 *        swapaxes(x, 0, 2) = [[[ 0, 4],
 *        [ 2, 6]],
 *        [[ 1, 5],
 *        [ 3, 7]]]
 *
 *
 *        Defined in src/operator/swapaxis.cc:L70
 * \param symbol_name name of the resulting symbol
 * \param data Input array.
 * \param dim1 the first axis to be swapped.
 * \param dim2 the second axis to be swapped.
 * \return new symbol
 */
inline Symbol SwapAxis(const std::string& symbol_name,
                       Symbol data,
                       uint32_t dim1 = 0,
                       uint32_t dim2 = 0) {
  return Operator("SwapAxis")
           .SetParam("dim1", dim1)
           .SetParam("dim2", dim2)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Calculate cross entropy of softmax output and one-hot label.
 *
 *        - This operator computes the cross entropy in two steps:
 *        - Applies softmax function on the input array.
 *        - Computes and returns the cross entropy loss between the softmax output and
 *
 *        - The softmax function and cross entropy loss is given by:
 *
 *        - Softmax Function:
 *
 *        .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}
 *
 *        - Cross Entropy Function:
 *
 *        .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i
 *
 *        Example::
 *
 *        x = [[1, 2, 3],
 *        [11, 7, 5]]
 *
 *        label = [2, 0]
 *
 *        softmax(x) = [[0.09003057, 0.24472848, 0.66524094],
 *        [0.97962922, 0.01794253, 0.00242826]]
 *
 *        softmax_cross_entropy(data, label) = - log(0.66524084) - log(0.97962922) =
 *
 *
 *
 *        Defined in src/operator/loss_binary_op.cc:L59
 * \param symbol_name name of the resulting symbol
 * \param data Input data
 * \param label Input label
 * \return new symbol
 */
inline Symbol softmax_cross_entropy(const std::string& symbol_name,
                                    Symbol data,
                                    Symbol label) {
  return Operator("softmax_cross_entropy")
           .SetInput("data", data)
           .SetInput("label", label)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \return new symbol
 */
inline Symbol _CustomFunction(const std::string& symbol_name) {
  return Operator("_CustomFunction")
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Stub for implementing an operator implemented in native frontend language.
 * \param symbol_name name of the resulting symbol
 * \param data Input data for the custom operator.
 * \param info
 * \param need_top_grad Whether this layer needs out grad for backward. Should be false
 * \return new symbol
 */
inline Symbol _Native(const std::string& symbol_name,
                      const std::vector<Symbol>& data,
                      void* info,
                      bool need_top_grad = true) {
  return Operator("_Native")
           .SetParam("info", info)
           .SetParam("need_top_grad", need_top_grad)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Stub for implementing an operator implemented in native frontend language with
 * \param symbol_name name of the resulting symbol
 * \param data Input data for the custom operator.
 * \param info
 * \return new symbol
 */
inline Symbol _NDArray(const std::string& symbol_name,
                       const std::vector<Symbol>& data,
                       void* info) {
  return Operator("_NDArray")
           .SetParam("info", info)
(data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Apply CountSketch to input: map a d-dimension data to k-dimension data"
 *
 *        .. note:: `count_sketch` is only available on GPU.
 *
 *        Assume input data has shape (N, d), sign hash table s has shape (N, d),
 *        index hash table h has shape (N, d) and mapping dimension out_dim = k,
 *        each element in s is either +1 or -1, each element in h is random integer from
 *        Then the operator computs:
 *
 *        .. math::
 *        out[h[i]] += data[i] * s[i]
 *
 *        Example::
 *
 *        out_dim = 5
 *        x = [[1.2, 2.5, 3.4],[3.2, 5.7, 6.6]]
 *        h = [[0, 3, 4]]
 *        s = [[1, -1, 1]]
 *        mx.contrib.ndarray.count_sketch(data=x, h=h, s=s, out_dim = 5) = [[1.2, 0, 0,
 *        [3.2, 0, 0, -5.7, 6.6]]
 *
 *
 *
 *        Defined in src/operator/contrib/count_sketch.cc:L67
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the CountSketchOp.
 * \param h The index vector
 * \param s The sign vector
 * \param out_dim The output dimension.
 * \param processing_batch_size How many sketch vectors to process at one time.
 * \return new symbol
 */
inline Symbol _contrib_count_sketch(const std::string& symbol_name,
                                    Symbol data,
                                    Symbol h,
                                    Symbol s,
                                    int out_dim,
                                    int processing_batch_size = 32) {
  return Operator("_contrib_count_sketch")
           .SetParam("out_dim", out_dim)
           .SetParam("processing_batch_size", processing_batch_size)
           .SetInput("data", data)
           .SetInput("h", h)
           .SetInput("s", s)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Compute Multibox training targets
 * \param symbol_name name of the resulting symbol
 * \param anchor Generated anchor boxes.
 * \param label Object detection labels.
 * \param cls_pred Class predictions.
 * \param overlap_threshold Anchor-GT overlap threshold to be regarded as a positive
 * \param ignore_label Label for ignored anchors.
 * \param negative_mining_ratio Max negative to positive samples ratio, use -1 to disable
 * \param negative_mining_thresh Threshold used for negative mining.
 * \param minimum_negative_samples Minimum number of negative samples.
 * \param variances Variances to be encoded in box regression target.
 * \return new symbol
 */
inline Symbol _contrib_MultiBoxTarget(const std::string& symbol_name,
                                      Symbol anchor,
                                      Symbol label,
                                      Symbol cls_pred,
                                      mx_float overlap_threshold = 0.5,
                                      mx_float ignore_label = -1,
                                      mx_float negative_mining_ratio = -1,
                                      mx_float negative_mining_thresh = 0.5,
                                      int minimum_negative_samples = 0,
                                      nnvm::Tuple<mx_float> variances = {0.1,0.1,0.2,0.2}) {
  return Operator("_contrib_MultiBoxTarget")
           .SetParam("overlap_threshold", overlap_threshold)
           .SetParam("ignore_label", ignore_label)
           .SetParam("negative_mining_ratio", negative_mining_ratio)
           .SetParam("negative_mining_thresh", negative_mining_thresh)
           .SetParam("minimum_negative_samples", minimum_negative_samples)
           .SetParam("variances", variances)
           .SetInput("anchor", anchor)
           .SetInput("label", label)
           .SetInput("cls_pred", cls_pred)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Performs region-of-interest pooling on inputs. Resize bounding box coordinates
 *        by spatial_scale and crop input feature maps accordingly. The cropped feature
 *        maps are pooled by max pooling to a fixed size output indicated by pooled_size.
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the pooling operator, a 4D Feature maps
 * \param rois Bounding box coordinates, a 2D array of [[batch_index, x1, y1, x2, y2]].
 *        (x1, y1) and (x2, y2) are top left and down right corners of designated region
 *        of interest. batch_index indicates the index of corresponding image in the
 * \param spatial_scale Ratio of input feature map height (or w) to raw image height (or
 * \param output_dim fix output dim
 * \param pooled_size fix pooled size
 * \param group_size fix group size
 * \return new symbol
 */
inline Symbol _contrib_PSROIPooling(const std::string& symbol_name,
                                    Symbol data,
                                    Symbol rois,
                                    mx_float spatial_scale,
                                    int output_dim,
                                    int pooled_size,
                                    int group_size = 0) {
  return Operator("_contrib_PSROIPooling")
           .SetParam("spatial_scale", spatial_scale)
           .SetParam("output_dim", output_dim)
           .SetParam("pooled_size", pooled_size)
           .SetParam("group_size", group_size)
           .SetInput("data", data)
           .SetInput("rois", rois)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Performs deformable position-sensitive region-of-interest pooling on inputs.
 *        The DeformablePSROIPooling operation is described in
 *        https://arxiv.org/abs/1703.06211 .batch_size will change to the number of
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the pooling operator, a 4D Feature maps
 * \param rois Bounding box coordinates, a 2D array of [[batch_index, x1, y1, x2, y2]].
 *        (x1, y1) and (x2, y2) are top left and down right corners of designated region
 *        of interest. batch_index indicates the index of corresponding image in the
 * \param trans transition parameter
 * \param spatial_scale Ratio of input feature map height (or w) to raw image height (or
 * \param output_dim fix output dim
 * \param group_size fix group size
 * \param pooled_size fix pooled size
 * \param part_size fix part size
 * \param sample_per_part fix samples per part
 * \param trans_std fix transition std
 * \param no_trans Whether to disable trans parameter.
 * \return new symbol
 */
inline Symbol _contrib_DeformablePSROIPooling(const std::string& symbol_name,
                                              Symbol data,
                                              Symbol rois,
                                              Symbol trans,
                                              mx_float spatial_scale,
                                              int output_dim,
                                              int group_size,
                                              int pooled_size,
                                              int part_size = 0,
                                              int sample_per_part = 1,
                                              mx_float trans_std = 0,
                                              bool no_trans = false) {
  return Operator("_contrib_DeformablePSROIPooling")
           .SetParam("spatial_scale", spatial_scale)
           .SetParam("output_dim", output_dim)
           .SetParam("group_size", group_size)
           .SetParam("pooled_size", pooled_size)
           .SetParam("part_size", part_size)
           .SetParam("sample_per_part", sample_per_part)
           .SetParam("trans_std", trans_std)
           .SetParam("no_trans", no_trans)
           .SetInput("data", data)
           .SetInput("rois", rois)
           .SetInput("trans", trans)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Apply 1D FFT to input"
 *
 *        .. note:: `fft` is only available on GPU.
 *
 *        Currently accept 2 input data shapes: (N, d) or (N1, N2, N3, d), data can only
 *        The output data has shape: (N, 2*d) or (N1, N2, N3, 2*d). The format is:
 *
 *        Example::
 *
 *        data = np.random.normal(0,1,(3,4))
 *        out = mx.contrib.ndarray.fft(data = mx.nd.array(data,ctx = mx.gpu(0)))
 *
 *
 *
 *        Defined in src/operator/contrib/fft.cc:L56
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the FFTOp.
 * \param compute_size Maximum size of sub-batch to be forwarded at one time
 * \return new symbol
 */
inline Symbol _contrib_fft(const std::string& symbol_name,
                           Symbol data,
                           int compute_size = 128) {
  return Operator("_contrib_fft")
           .SetParam("compute_size", compute_size)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Generate prior(anchor) boxes from data, sizes and ratios.
 * \param symbol_name name of the resulting symbol
 * \param data Input data.
 * \param sizes List of sizes of generated MultiBoxPriores.
 * \param ratios List of aspect ratios of generated MultiBoxPriores.
 * \param clip Whether to clip out-of-boundary boxes.
 * \param steps Priorbox step across y and x, -1 for auto calculation.
 * \param offsets Priorbox center offsets, y and x respectively
 * \return new symbol
 */
inline Symbol _contrib_MultiBoxPrior(const std::string& symbol_name,
                                     Symbol data,
                                     nnvm::Tuple<mx_float> sizes = {1},
                                     nnvm::Tuple<mx_float> ratios = {1},
                                     bool clip = false,
                                     nnvm::Tuple<mx_float> steps = {-1,-1},
                                     nnvm::Tuple<mx_float> offsets = {0.5,0.5}) {
  return Operator("_contrib_MultiBoxPrior")
           .SetParam("sizes", sizes)
           .SetParam("ratios", ratios)
           .SetParam("clip", clip)
           .SetParam("steps", steps)
           .SetParam("offsets", offsets)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Generate region proposals via RPN
 * \param symbol_name name of the resulting symbol
 * \param cls_prob Score of how likely proposal is object.
 * \param bbox_pred BBox Predicted deltas from anchors for proposals
 * \param im_info Image size and scale.
 * \param rpn_pre_nms_top_n Number of top scoring boxes to keep before applying NMS to
 * \param rpn_post_nms_top_n Number of top scoring boxes to keep after applying NMS to
 * \param threshold NMS value, below which to suppress.
 * \param rpn_min_size Minimum height or width in proposal
 * \param scales Used to generate anchor windows by enumerating scales
 * \param ratios Used to generate anchor windows by enumerating ratios
 * \param feature_stride The size of the receptive field each unit in the convolution
 * \param output_score Add score to outputs
 * \param iou_loss Usage of IoU Loss
 * \return new symbol
 */
inline Symbol _contrib_MultiProposal(const std::string& symbol_name,
                                     Symbol cls_prob,
                                     Symbol bbox_pred,
                                     Symbol im_info,
                                     int rpn_pre_nms_top_n = 6000,
                                     int rpn_post_nms_top_n = 300,
                                     mx_float threshold = 0.699999988,
                                     int rpn_min_size = 16,
                                     nnvm::Tuple<mx_float> scales = {4,8,16,32},
                                     nnvm::Tuple<mx_float> ratios = {0.5,1,2},
                                     int feature_stride = 16,
                                     bool output_score = false,
                                     bool iou_loss = false) {
  return Operator("_contrib_MultiProposal")
           .SetParam("rpn_pre_nms_top_n", rpn_pre_nms_top_n)
           .SetParam("rpn_post_nms_top_n", rpn_post_nms_top_n)
           .SetParam("threshold", threshold)
           .SetParam("rpn_min_size", rpn_min_size)
           .SetParam("scales", scales)
           .SetParam("ratios", ratios)
           .SetParam("feature_stride", feature_stride)
           .SetParam("output_score", output_score)
           .SetParam("iou_loss", iou_loss)
           .SetInput("cls_prob", cls_prob)
           .SetInput("bbox_pred", bbox_pred)
           .SetInput("im_info", im_info)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Generate region proposals via RPN
 * \param symbol_name name of the resulting symbol
 * \param cls_prob Score of how likely proposal is object.
 * \param bbox_pred BBox Predicted deltas from anchors for proposals
 * \param im_info Image size and scale.
 * \param rpn_pre_nms_top_n Number of top scoring boxes to keep before applying NMS to
 * \param rpn_post_nms_top_n Number of top scoring boxes to keep after applying NMS to
 * \param threshold NMS value, below which to suppress.
 * \param rpn_min_size Minimum height or width in proposal
 * \param scales Used to generate anchor windows by enumerating scales
 * \param ratios Used to generate anchor windows by enumerating ratios
 * \param feature_stride The size of the receptive field each unit in the convolution
 * \param output_score Add score to outputs
 * \param iou_loss Usage of IoU Loss
 * \return new symbol
 */
inline Symbol _contrib_Proposal(const std::string& symbol_name,
                                Symbol cls_prob,
                                Symbol bbox_pred,
                                Symbol im_info,
                                int rpn_pre_nms_top_n = 6000,
                                int rpn_post_nms_top_n = 300,
                                mx_float threshold = 0.699999988,
                                int rpn_min_size = 16,
                                nnvm::Tuple<mx_float> scales = {4,8,16,32},
                                nnvm::Tuple<mx_float> ratios = {0.5,1,2},
                                int feature_stride = 16,
                                bool output_score = false,
                                bool iou_loss = false) {
  return Operator("_contrib_Proposal")
           .SetParam("rpn_pre_nms_top_n", rpn_pre_nms_top_n)
           .SetParam("rpn_post_nms_top_n", rpn_post_nms_top_n)
           .SetParam("threshold", threshold)
           .SetParam("rpn_min_size", rpn_min_size)
           .SetParam("scales", scales)
           .SetParam("ratios", ratios)
           .SetParam("feature_stride", feature_stride)
           .SetParam("output_score", output_score)
           .SetParam("iou_loss", iou_loss)
           .SetInput("cls_prob", cls_prob)
           .SetInput("bbox_pred", bbox_pred)
           .SetInput("im_info", im_info)
           .CreateSymbol(symbol_name);
}

/*! \brief Set layout for input, output and weight. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.
 */
enum class _contrib_DeformableConvolutionLayout {
  kNone = 0,
  kNCDHW = 1,
  kNCHW = 2,
  kNCW = 3
};

/*!
 * \brief Compute 2-D deformable convolution on 4-D input.
 *
 *        The deformable convolution operation is described in
 *
 *        For 2-D deformable convolution, the shapes are
 *
 *        - **data**: *(batch_size, channel, height, width)*
 *        - **offset**: *(batch_size, num_deformable_group * kernel[0] * kernel[1] * 2,
 *        - **weight**: *(num_filter, channel, kernel[0], kernel[1])*
 *        - **bias**: *(num_filter,)*
 *        - **out**: *(batch_size, num_filter, out_height, out_width)*.
 *
 *        Define::
 *
 *        f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1
 *
 *        then we have::
 *
 *        out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
 *        out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])
 *
 *        If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
 *
 *        The default data ``layout`` is *NCHW*, namely *(batch_size, channle, height,
 *        width)*.
 *
 *        If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
 *        evenly into *g* parts along the channel axis, and also evenly split ``weight``
 *        along the first dimension. Next compute the convolution on the *i*-th part of
 *        the data with the *i*-th weight part. The output is obtained by concating all
 *        the *g* results.
 *
 *        If ``num_deformable_group`` is larger than 1, denoted by *dg*, then split the
 *        input ``offset`` evenly into *dg* parts along the channel axis, and also evenly
 *        split ``data`` into *dg* parts along the channel axis. Next compute the
 *        deformable convolution, apply the *i*-th part of the offset on the *i*-th part
 *        of the data.
 *
 *
 *        Both ``weight`` and ``bias`` are learnable parameters.
 *
 *
 *
 *
 *        Defined in src/operator/contrib/deformable_convolution.cc:L100
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the DeformableConvolutionOp.
 * \param offset Input offset to the DeformableConvolutionOp.
 * \param weight Weight matrix.
 * \param bias Bias parameter.
 * \param kernel Convolution kernel size: (h, w) or (d, h, w)
 * \param num_filter Convolution filter(channel) number
 * \param stride Convolution stride: (h, w) or (d, h, w). Defaults to 1 for each
 * \param dilate Convolution dilate: (h, w) or (d, h, w). Defaults to 1 for each
 * \param pad Zero pad for convolution: (h, w) or (d, h, w). Defaults to no padding.
 * \param num_group Number of group partitions.
 * \param num_deformable_group Number of deformable group partitions.
 * \param workspace Maximum temperal workspace allowed for convolution (MB).
 * \param no_bias Whether to disable bias parameter.
 * \param layout Set layout for input, output and weight. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.
 * \return new symbol
 */
inline Symbol _contrib_DeformableConvolution(const std::string& symbol_name,
                                             Symbol data,
                                             Symbol offset,
                                             Symbol weight,
                                             Symbol bias,
                                             Shape kernel,
                                             int num_filter,
                                             Shape stride = {},
                                             Shape dilate = {},
                                             Shape pad = {},
                                             int num_group = 1,
                                             int num_deformable_group = 1,
                                             uint64_t workspace = 1024,
                                             bool no_bias = false,
                                             _contrib_DeformableConvolutionLayout layout = _contrib_DeformableConvolutionLayout::kNone) {
  static const char *_contrib_DeformableConvolutionLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW"
  };
  return Operator("_contrib_DeformableConvolution")
           .SetParam("kernel", kernel)
           .SetParam("num_filter", num_filter)
           .SetParam("stride", stride)
           .SetParam("dilate", dilate)
           .SetParam("pad", pad)
           .SetParam("num_group", num_group)
           .SetParam("num_deformable_group", num_deformable_group)
           .SetParam("workspace", workspace)
           .SetParam("no_bias", no_bias)
           .SetParam("layout", _contrib_DeformableConvolutionLayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("offset", offset)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Apply 1D ifft to input"
 *
 *        .. note:: `ifft` is only available on GPU.
 *
 *        Currently accept 2 input data shapes: (N, d) or (N1, N2, N3, d). Data is in
 *        Last dimension must be an even number.
 *        The output data has shape: (N, d/2) or (N1, N2, N3, d/2). It is only the real
 *
 *        Example::
 *
 *        data = np.random.normal(0,1,(3,4))
 *        out = mx.contrib.ndarray.ifft(data = mx.nd.array(data,ctx = mx.gpu(0)))
 *
 *
 *
 *        Defined in src/operator/contrib/ifft.cc:L58
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the IFFTOp.
 * \param compute_size Maximum size of sub-batch to be forwarded at one time
 * \return new symbol
 */
inline Symbol _contrib_ifft(const std::string& symbol_name,
                            Symbol data,
                            int compute_size = 128) {
  return Operator("_contrib_ifft")
           .SetParam("compute_size", compute_size)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Convert multibox detection predictions.
 * \param symbol_name name of the resulting symbol
 * \param cls_prob Class probabilities.
 * \param loc_pred Location regression predictions.
 * \param anchor Multibox prior anchor boxes
 * \param clip Clip out-of-boundary boxes.
 * \param threshold Threshold to be a positive prediction.
 * \param background_id Background id.
 * \param nms_threshold Non-maximum suppression threshold.
 * \param force_suppress Suppress all detections regardless of class_id.
 * \param variances Variances to be decoded from box regression output.
 * \param nms_topk Keep maximum top k detections before nms, -1 for no limit.
 * \return new symbol
 */
inline Symbol _contrib_MultiBoxDetection(const std::string& symbol_name,
                                         Symbol cls_prob,
                                         Symbol loc_pred,
                                         Symbol anchor,
                                         bool clip = true,
                                         mx_float threshold = 0.00999999978,
                                         int background_id = 0,
                                         mx_float nms_threshold = 0.5,
                                         bool force_suppress = false,
                                         nnvm::Tuple<mx_float> variances = {0.1,0.1,0.2,0.2},
                                         int nms_topk = -1) {
  return Operator("_contrib_MultiBoxDetection")
           .SetParam("clip", clip)
           .SetParam("threshold", threshold)
           .SetParam("background_id", background_id)
           .SetParam("nms_threshold", nms_threshold)
           .SetParam("force_suppress", force_suppress)
           .SetParam("variances", variances)
           .SetParam("nms_topk", nms_topk)
           .SetInput("cls_prob", cls_prob)
           .SetInput("loc_pred", loc_pred)
           .SetInput("anchor", anchor)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Applies instance normalization to the n-dimensional input array.
 *
 *        This operator takes an n-dimensional input array where (n>2) and normalizes
 *        the input using the following formula:
 *
 *        .. math::
 *
 *        out = \frac{x - mean[data]}{ \sqrt{Var[data]} + \epsilon} * gamma + beta
 *
 *        This layer is similar to batch normalization layer (`BatchNorm`)
 *        with two differences: first, the normalization is
 *        carried out per example (instance), not over a batch. Second, the
 *        same normalization is applied both at test and train time. This
 *        operation is also known as `contrast normalization`.
 *
 *        If the input data is of shape [batch, channel, spacial_dim1, spacial_dim2, ...],
 *        `gamma` and `beta` parameters must be vectors of shape [channel].
 *
 *        This implementation is based on paper:
 *
 *        .. [1] Instance Normalization: The Missing Ingredient for Fast Stylization,
 *        D. Ulyanov, A. Vedaldi, V. Lempitsky, 2016 (arXiv:1607.08022v2).
 *
 *        Examples::
 *
 *        // Input of shape (2,1,2)
 *        x = [[[ 1.1,  2.2]],
 *        [[ 3.3,  4.4]]]
 *
 *        // gamma parameter of length 1
 *        gamma = [1.5]
 *
 *        // beta parameter of length 1
 *        beta = [0.5]
 *
 *        // Instance normalization is calculated with the above formula
 *        InstanceNorm(x,gamma,beta) = [[[-0.997527  ,  1.99752665]],
 *        [[-0.99752653,  1.99752724]]]
 *
 *
 *
 *        Defined in src/operator/instance_norm.cc:L95
 * \param symbol_name name of the resulting symbol
 * \param data An n-dimensional input array (n > 2) of the form [batch, channel,
 * \param gamma A vector of length 'channel', which multiplies the normalized input.
 * \param beta A vector of length 'channel', which is added to the product of the
 * \param eps An `epsilon` parameter to prevent division by 0.
 * \return new symbol
 */
inline Symbol InstanceNorm(const std::string& symbol_name,
                           Symbol data,
                           Symbol gamma,
                           Symbol beta,
                           mx_float eps = 0.00100000005) {
  return Operator("InstanceNorm")
           .SetParam("eps", eps)
           .SetInput("data", data)
           .SetInput("gamma", gamma)
           .SetInput("beta", beta)
           .CreateSymbol(symbol_name);
}

/*! \brief The type of transformation. For `affine`, input data should be an affine matrix
 *        of size (batch, 6). For `warp`, input data should be an optical flow of size
 */
enum class GridGeneratorTransformType {
  kAffine = 0,
  kWarp = 1
};

/*!
 * \brief Generates 2D sampling grid for bilinear sampling.
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the function.
 * \param transform_type The type of transformation. For `affine`, input data should be
 *        an affine matrix of size (batch, 6). For `warp`, input data should be an
 * \param target_shape Specifies the output shape (H, W). This is required if
 *        transformation type is `affine`. If transformation type is `warp`, this
 * \return new symbol
 */
inline Symbol GridGenerator(const std::string& symbol_name,
                            Symbol data,
                            GridGeneratorTransformType transform_type,
                            Shape target_shape = {0,0}) {
  static const char *GridGeneratorTransformTypeValues[] = {
    "affine",
    "warp"
  };
  return Operator("GridGenerator")
           .SetParam("transform_type", GridGeneratorTransformTypeValues[int(transform_type)])
           .SetParam("target_shape", target_shape)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*! \brief Whether to pick convolution algo by running performance test.
 *        Leads to higher startup time but may give faster speed. Options are:
 *        'off': no tuning
 *        'limited_workspace': run test and pick the fastest algorithm that doesn't
 *        'fastest': pick the fastest algorithm and ignore workspace limit.
 *        If set to None (default), behavior is determined by environment
 *        variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off,
 *        1 for limited workspace (default), 2 for fastest.
 */
enum class Convolution_v1CudnnTune {
  kNone = 0,
  kFastest = 1,
  kLimited_workspace = 2,
  kOff = 3
};

/*! \brief Set layout for input, output and weight. Empty for
 *        default layout: NCHW for 2d and NCDHW for 3d.
 */
enum class Convolution_v1Layout {
  kNone = 0,
  kNCDHW = 1,
  kNCHW = 2,
  kNDHWC = 3,
  kNHWC = 4
};

/*!
 * \brief This operator is DEPRECATED. Apply convolution to input then add a bias.
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the ConvolutionV1Op.
 * \param weight Weight matrix.
 * \param bias Bias parameter.
 * \param kernel convolution kernel size: (h, w) or (d, h, w)
 * \param num_filter convolution filter(channel) number
 * \param stride convolution stride: (h, w) or (d, h, w)
 * \param dilate convolution dilate: (h, w) or (d, h, w)
 * \param pad pad for convolution: (h, w) or (d, h, w)
 * \param num_group Number of group partitions. Equivalent to slicing input into num_group
 *        partitions, apply convolution on each, then concatenate the results
 * \param workspace Maximum temporary workspace allowed for convolution (MB).This
 *        parameter determines the effective batch size of the convolution kernel, which
 *        may be smaller than the given batch size. Also, the workspace will be
 * \param no_bias Whether to disable bias parameter.
 * \param cudnn_tune Whether to pick convolution algo by running performance test.
 *        Leads to higher startup time but may give faster speed. Options are:
 *        'off': no tuning
 *        'limited_workspace': run test and pick the fastest algorithm that doesn't
 *        'fastest': pick the fastest algorithm and ignore workspace limit.
 *        If set to None (default), behavior is determined by environment
 *        variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off,
 *        1 for limited workspace (default), 2 for fastest.
 * \param cudnn_off Turn off cudnn for this layer.
 * \param layout Set layout for input, output and weight. Empty for
 *        default layout: NCHW for 2d and NCDHW for 3d.
 * \return new symbol
 */
inline Symbol Convolution_v1(const std::string& symbol_name,
                             Symbol data,
                             Symbol weight,
                             Symbol bias,
                             Shape kernel,
                             uint32_t num_filter,
                             Shape stride = {},
                             Shape dilate = {},
                             Shape pad = {},
                             uint32_t num_group = 1,
                             uint64_t workspace = 1024,
                             bool no_bias = false,
                             Convolution_v1CudnnTune cudnn_tune = Convolution_v1CudnnTune::kNone,
                             bool cudnn_off = false,
                             Convolution_v1Layout layout = Convolution_v1Layout::kNone) {
  static const char *Convolution_v1CudnnTuneValues[] = {
    "None",
    "fastest",
    "limited_workspace",
    "off"
  };
  static const char *Convolution_v1LayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NDHWC",
    "NHWC"
  };
  return Operator("Convolution_v1")
           .SetParam("kernel", kernel)
           .SetParam("num_filter", num_filter)
           .SetParam("stride", stride)
           .SetParam("dilate", dilate)
           .SetParam("pad", pad)
           .SetParam("num_group", num_group)
           .SetParam("workspace", workspace)
           .SetParam("no_bias", no_bias)
           .SetParam("cudnn_tune", Convolution_v1CudnnTuneValues[int(cudnn_tune)])
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("layout", Convolution_v1LayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 *
 *        .. note:: `Crop` is deprecated. Use `slice` instead.
 *
 *        Crop the 2nd and 3rd dim of input data, with the corresponding size of h_w or
 *        with width and height of the second input symbol, i.e., with one input, we need
 *        specify the crop height and width, otherwise the second input symbol's size
 *
 *
 *        Defined in src/operator/crop.cc:L50
 * \param symbol_name name of the resulting symbol
 * \param data Tensor or List of Tensors, the second input will be used as crop_like
 * \param num_args Number of inputs for crop, if equals one, then we will use the h_wfor
 *        crop height and width, else if equals two, then we will use the heightand width
 * \param offset crop offset coordinate: (y, x)
 * \param h_w crop height and width: (h, w)
 * \param center_crop If set to true, then it will use be the center_crop,or it will crop
 * \return new symbol
 */
inline Symbol Crop(const std::string& symbol_name,
                   const std::vector<Symbol>& data,
                   int num_args,
                   Shape offset = {0,0},
                   Shape h_w = {0,0},
                   bool center_crop = false) {
  return Operator("Crop")
           .SetParam("num_args", num_args)
           .SetParam("offset", offset)
           .SetParam("h_w", h_w)
           .SetParam("center_crop", center_crop)
(data)
           .CreateSymbol(symbol_name);
}

/*! \brief transformation type
 */
enum class SpatialTransformerTransformType {
  kAffine = 0
};

/*! \brief sampling type
 */
enum class SpatialTransformerSamplerType {
  kBilinear = 0
};

/*!
 * \brief Applies a spatial transformer to input feature map.
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the SpatialTransformerOp.
 * \param loc localisation net, the output dim should be 6 when transform_type is affine.
 * \param transform_type transformation type
 * \param sampler_type sampling type
 * \param target_shape output shape(h, w) of spatial transformer: (y, x)
 * \param cudnn_off whether to turn cudnn off
 * \return new symbol
 */
inline Symbol SpatialTransformer(const std::string& symbol_name,
                                 Symbol data,
                                 Symbol loc,
                                 SpatialTransformerTransformType transform_type,
                                 SpatialTransformerSamplerType sampler_type,
                                 Shape target_shape = {0,0},
                                 dmlc::optional<bool> cudnn_off = dmlc::optional<bool>()) {
  static const char *SpatialTransformerTransformTypeValues[] = {
    "affine"
  };
  static const char *SpatialTransformerSamplerTypeValues[] = {
    "bilinear"
  };
  return Operator("SpatialTransformer")
           .SetParam("transform_type", SpatialTransformerTransformTypeValues[int(transform_type)])
           .SetParam("sampler_type", SpatialTransformerSamplerTypeValues[int(sampler_type)])
           .SetParam("target_shape", target_shape)
           .SetParam("cudnn_off", cudnn_off)
           .SetInput("data", data)
           .SetInput("loc", loc)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Performs region of interest(ROI) pooling on the input array.
 *
 *        ROI pooling is a variant of a max pooling layer, in which the output size is
 *        region of interest is a parameter. Its purpose is to perform max pooling on the
 *        of non-uniform sizes to obtain fixed-size feature maps. ROI pooling is a
 *        layer mostly used in training a `Fast R-CNN` network for object detection.
 *
 *        This operator takes a 4D feature map as an input array and region proposals as
 *        then it pools over sub-regions of input and produces a fixed-sized output array
 *        regardless of the ROI size.
 *
 *        To crop the feature map accordingly, you can resize the bounding box coordinates
 *        by changing the parameters `rois` and `spatial_scale`.
 *
 *        The cropped feature maps are pooled by standard max pooling operation to a
 *        indicated by a `pooled_size` parameter. batch_size will change to the number of
 *        bounding boxes after `ROIPooling`.
 *
 *        The size of each region of interest doesn't have to be perfectly divisible by
 *        the number of pooling sections(`pooled_size`).
 *
 *        Example::
 *
 *        x = [[[[  0.,   1.,   2.,   3.,   4.,   5.],
 *        [  6.,   7.,   8.,   9.,  10.,  11.],
 *        [ 12.,  13.,  14.,  15.,  16.,  17.],
 *        [ 18.,  19.,  20.,  21.,  22.,  23.],
 *        [ 24.,  25.,  26.,  27.,  28.,  29.],
 *        [ 30.,  31.,  32.,  33.,  34.,  35.],
 *        [ 36.,  37.,  38.,  39.,  40.,  41.],
 *        [ 42.,  43.,  44.,  45.,  46.,  47.]]]]
 *
 *        // region of interest i.e. bounding box coordinates.
 *        y = [[0,0,0,4,4]]
 *
 *        // returns array of shape (2,2) according to the given roi with max pooling.
 *        ROIPooling(x, y, (2,2), 1.0) = [[[[ 14.,  16.],
 *        [ 26.,  28.]]]]
 *
 *        // region of interest is changed due to the change in `spacial_scale` parameter.
 *        ROIPooling(x, y, (2,2), 0.7) = [[[[  7.,   9.],
 *        [ 19.,  21.]]]]
 *
 *
 *
 *        Defined in src/operator/roi_pooling.cc:L295
 * \param symbol_name name of the resulting symbol
 * \param data The input array to the pooling operator,  a 4D Feature maps
 * \param rois Bounding box coordinates, a 2D array of [[batch_index, x1, y1, x2, y2]],
 *        where (x1, y1) and (x2, y2) are top left and bottom right corners of designated
 *        region of interest. `batch_index` indicates the index of corresponding image in
 * \param pooled_size ROI pooling output shape (h,w)
 * \param spatial_scale Ratio of input feature map height (or w) to raw image height (or
 * \return new symbol
 */
inline Symbol ROIPooling(const std::string& symbol_name,
                         Symbol data,
                         Symbol rois,
                         Shape pooled_size,
                         mx_float spatial_scale) {
  return Operator("ROIPooling")
           .SetParam("pooled_size", pooled_size)
           .SetParam("spatial_scale", spatial_scale)
           .SetInput("data", data)
           .SetInput("rois", rois)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Special op to copy data cross device
 * \param symbol_name name of the resulting symbol
 * \return new symbol
 */
inline Symbol _CrossDeviceCopy(const std::string& symbol_name) {
  return Operator("_CrossDeviceCopy")
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Applies bilinear sampling to input feature map.
 *
 *        Bilinear Sampling is the key of  [NIPS2015] \"Spatial Transformer Networks\".
 *        except that the operator has the backward pass.
 *
 *        Given :math:`data` and :math:`grid`, then the output is computed by
 *
 *        .. math::
 *        x_{src} = grid[batch, 0, y_{dst}, x_{dst}] \\
 *        y_{src} = grid[batch, 1, y_{dst}, x_{dst}] \\
 *        output[batch, channel, y_{dst}, x_{dst}] = G(data[batch, channel, y_{src},
 *
 *        :math:`x_{dst}`, :math:`y_{dst}` enumerate all spatial locations in
 *        The out-boundary points will be padded with zeros.The shape of the output will
 *
 *        The operator assumes that :math:`data` has 'NCHW' layout and :math:`grid` has
 *
 *        BilinearSampler often cooperates with GridGenerator which generates sampling
 *        GridGenerator supports two kinds of transformation: ``affine`` and ``warp``.
 *        If users want to design a CustomOp to manipulate :math:`grid`, please firstly
 *
 *        Example 1::
 *
 *        ## Zoom out data two times
 *        data = array([[[[1, 4, 3, 6],
 *        [1, 8, 8, 9],
 *        [0, 4, 1, 5],
 *        [1, 0, 1, 3]]]])
 *
 *        affine_matrix = array([[2, 0, 0],
 *        [0, 2, 0]])
 *
 *        affine_matrix = reshape(affine_matrix, shape=(1, 6))
 *
 *        grid = GridGenerator(data=affine_matrix, transform_type='affine',
 *
 *        out = BilinearSampler(data, grid)
 *
 *        out
 *        [[[[ 0,   0,     0,   0],
 *        [ 0,   3.5,   6.5, 0],
 *        [ 0,   1.25,  2.5, 0],
 *        [ 0,   0,     0,   0]]]
 *
 *
 *        Example 2::
 *
 *        ## shift data horizontally by -1 pixel
 *
 *        data = array([[[[1, 4, 3, 6],
 *        [1, 8, 8, 9],
 *        [0, 4, 1, 5],
 *        [1, 0, 1, 3]]]])
 *
 *        warp_maxtrix = array([[[[1, 1, 1, 1],
 *        [1, 1, 1, 1],
 *        [1, 1, 1, 1],
 *        [1, 1, 1, 1]],
 *        [[0, 0, 0, 0],
 *        [0, 0, 0, 0],
 *        [0, 0, 0, 0],
 *        [0, 0, 0, 0]]]])
 *
 *        grid = GridGenerator(data=warp_matrix, transform_type='warp')
 *        out = BilinearSampler(data, grid)
 *
 *        out
 *        [[[[ 4,  3,  6,  0],
 *        [ 8,  8,  9,  0],
 *        [ 4,  1,  5,  0],
 *        [ 0,  1,  3,  0]]]
 *
 *
 *        Defined in src/operator/bilinear_sampler.cc:L256
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the BilinearsamplerOp.
 * \param grid Input grid to the BilinearsamplerOp.grid has two channels: x_src, y_src
 * \param cudnn_off whether to turn cudnn off
 * \return new symbol
 */
inline Symbol BilinearSampler(const std::string& symbol_name,
                              Symbol data,
                              Symbol grid,
                              dmlc::optional<bool> cudnn_off = dmlc::optional<bool>()) {
  return Operator("BilinearSampler")
           .SetParam("cudnn_off", cudnn_off)
           .SetInput("data", data)
           .SetInput("grid", grid)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Takes the last element of a sequence.
 *
 *        This function takes an n-dimensional input array of the form
 *        [max_sequence_length, batch_size, other_feature_dims] and returns a
 *        of the form [batch_size, other_feature_dims].
 *
 *        Parameter `sequence_length` is used to handle variable-length sequences.
 *        an input array of positive ints of dimension [batch_size]. To use this
 *        set `use_sequence_length` to `True`, otherwise each example in the batch is
 *        to have the max sequence length.
 *
 *        .. note:: Alternatively, you can also use `take` operator.
 *
 *        Example::
 *
 *        x = [[[  1.,   2.,   3.],
 *        [  4.,   5.,   6.],
 *        [  7.,   8.,   9.]],
 *
 *        [[ 10.,   11.,   12.],
 *        [ 13.,   14.,   15.],
 *        [ 16.,   17.,   18.]],
 *
 *        [[  19.,   20.,   21.],
 *        [  22.,   23.,   24.],
 *        [  25.,   26.,   27.]]]
 *
 *        // returns last sequence when sequence_length parameter is not used
 *        SequenceLast(x) = [[  19.,   20.,   21.],
 *        [  22.,   23.,   24.],
 *        [  25.,   26.,   27.]]
 *
 *        // sequence_length is used
 *        SequenceLast(x, sequence_length=[1,1,1], use_sequence_length=True) =
 *        [[  1.,   2.,   3.],
 *        [  4.,   5.,   6.],
 *        [  7.,   8.,   9.]]
 *
 *        // sequence_length is used
 *        SequenceLast(x, sequence_length=[1,2,3], use_sequence_length=True) =
 *        [[  1.,    2.,   3.],
 *        [  13.,  14.,  15.],
 *        [  25.,  26.,  27.]]
 *
 *
 *
 *        Defined in src/operator/sequence_last.cc:L100
 * \param symbol_name name of the resulting symbol
 * \param data n-dimensional input array of the form [max_sequence_length, batch_size,
 * \param sequence_length vector of sequence lengths of the form [batch_size]
 * \param use_sequence_length If set to true, this layer takes in an extra input
 * \param axis The sequence axis. Only values of 0 and 1 are currently supported.
 * \return new symbol
 */
inline Symbol SequenceLast(const std::string& symbol_name,
                           Symbol data,
                           Symbol sequence_length,
                           bool use_sequence_length = false,
                           int axis = 0) {
  return Operator("SequenceLast")
           .SetParam("use_sequence_length", use_sequence_length)
           .SetParam("axis", axis)
           .SetInput("data", data)
           .SetInput("sequence_length", sequence_length)
           .CreateSymbol(symbol_name);
}

/*! \brief Pooling type to be applied.
 */
enum class Pooling_v1PoolType {
  kAvg = 0,
  kMax = 1,
  kSum = 2
};

/*! \brief Pooling convention to be applied.
 */
enum class Pooling_v1PoolingConvention {
  kFull = 0,
  kValid = 1
};

/*!
 * \brief This operator is DEPRECATED.
 *        Perform pooling on the input.
 *
 *        The shapes for 2-D pooling is
 *
 *        - **data**: *(batch_size, channel, height, width)*
 *        - **out**: *(batch_size, num_filter, out_height, out_width)*, with::
 *
 *        out_height = f(height, kernel[0], pad[0], stride[0])
 *        out_width = f(width, kernel[1], pad[1], stride[1])
 *
 *        The definition of *f* depends on ``pooling_convention``, which has two options:
 *
 *        - **valid** (default)::
 *
 *        f(x, k, p, s) = floor((x+2*p-k)/s)+1
 *
 *        - **full**, which is compatible with Caffe::
 *
 *        f(x, k, p, s) = ceil((x+2*p-k)/s)+1
 *
 *        But ``global_pool`` is set to be true, then do a global pooling, namely reset
 *        ``kernel=(height, width)``.
 *
 *        Three pooling options are supported by ``pool_type``:
 *
 *        - **avg**: average pooling
 *        - **max**: max pooling
 *        - **sum**: sum pooling
 *
 *        1-D pooling is special case of 2-D pooling with *weight=1* and
 *        *kernel[1]=1*.
 *
 *        For 3-D pooling, an additional *depth* dimension is added before
 *        *height*. Namely the input data will have shape *(batch_size, channel, depth,
 *        height, width)*.
 *
 *
 *
 *        Defined in src/operator/pooling_v1.cc:L104
 * \param symbol_name name of the resulting symbol
 * \param data Input data to the pooling operator.
 * \param kernel pooling kernel size: (y, x) or (d, y, x)
 * \param pool_type Pooling type to be applied.
 * \param global_pool Ignore kernel size, do global pooling based on current input
 * \param pooling_convention Pooling convention to be applied.
 * \param stride stride: for pooling (y, x) or (d, y, x)
 * \param pad pad for pooling: (y, x) or (d, y, x)
 * \return new symbol
 */
inline Symbol Pooling_v1(const std::string& symbol_name,
                         Symbol data,
                         Shape kernel = {},
                         Pooling_v1PoolType pool_type = Pooling_v1PoolType::kMax,
                         bool global_pool = false,
                         Pooling_v1PoolingConvention pooling_convention = Pooling_v1PoolingConvention::kValid,
                         Shape stride = {},
                         Shape pad = {}) {
  static const char *Pooling_v1PoolTypeValues[] = {
    "avg",
    "max",
    "sum"
  };
  static const char *Pooling_v1PoolingConventionValues[] = {
    "full",
    "valid"
  };
  return Operator("Pooling_v1")
           .SetParam("kernel", kernel)
           .SetParam("pool_type", Pooling_v1PoolTypeValues[int(pool_type)])
           .SetParam("global_pool", global_pool)
           .SetParam("pooling_convention", Pooling_v1PoolingConventionValues[int(pooling_convention)])
           .SetParam("stride", stride)
           .SetParam("pad", pad)
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Reverses the elements of each sequence.
 *
 *        This function takes an n-dimensional input array of the form
 *        and returns an array of the same shape.
 *
 *        Parameter `sequence_length` is used to handle variable-length sequences.
 *        `sequence_length` should be an input array of positive ints of dimension
 *        To use this parameter, set `use_sequence_length` to `True`,
 *        otherwise each example in the batch is assumed to have the max sequence length.
 *
 *        Example::
 *
 *        x = [[[  1.,   2.,   3.],
 *        [  4.,   5.,   6.]],
 *
 *        [[  7.,   8.,   9.],
 *        [ 10.,  11.,  12.]],
 *
 *        [[ 13.,  14.,   15.],
 *        [ 16.,  17.,   18.]]]
 *
 *        // Batch 1
 *        B1 = [[  1.,   2.,   3.],
 *        [  7.,   8.,   9.],
 *        [ 13.,  14.,  15.]]
 *
 *        // Batch 2
 *        B2 = [[  4.,   5.,   6.],
 *        [ 10.,  11.,  12.],
 *        [ 16.,  17.,  18.]]
 *
 *        // returns reverse sequence when sequence_length parameter is not used
 *        SequenceReverse(x) = [[[ 13.,  14.,   15.],
 *        [ 16.,  17.,   18.]],
 *
 *        [[  7.,   8.,   9.],
 *        [ 10.,  11.,  12.]],
 *
 *        [[  1.,   2.,   3.],
 *        [  4.,   5.,   6.]]]
 *
 *        // sequence_length [2,2] means 2 rows of
 *        // both batch B1 and B2 will be reversed.
 *        SequenceReverse(x, sequence_length=[2,2], use_sequence_length=True) =
 *        [[[  7.,   8.,   9.],
 *        [ 10.,  11.,  12.]],
 *
 *        [[  1.,   2.,   3.],
 *        [  4.,   5.,   6.]],
 *
 *        [[ 13.,  14.,   15.],
 *        [ 16.,  17.,   18.]]]
 *
 *        // sequence_length [2,3] means 2 of batch B2 and 3 of batch B3
 *        // will be reversed.
 *        SequenceReverse(x, sequence_length=[2,3], use_sequence_length=True) =
 *        [[[  7.,   8.,   9.],
 *        [ 16.,  17.,  18.]],
 *
 *        [[  1.,   2.,   3.],
 *        [ 10.,  11.,  12.]],
 *
 *        [[ 13.,  14,   15.],
 *        [  4.,   5.,   6.]]]
 *
 *
 *
 *        Defined in src/operator/sequence_reverse.cc:L122
 * \param symbol_name name of the resulting symbol
 * \param data n-dimensional input array of the form [max_sequence_length, batch_size,
 * \param sequence_length vector of sequence lengths of the form [batch_size]
 * \param use_sequence_length If set to true, this layer takes in an extra input
 * \param axis The sequence axis. Only 0 is currently supported.
 * \return new symbol
 */
inline Symbol SequenceReverse(const std::string& symbol_name,
                              Symbol data,
                              Symbol sequence_length,
                              bool use_sequence_length = false,
                              int axis = 0) {
  return Operator("SequenceReverse")
           .SetParam("use_sequence_length", use_sequence_length)
           .SetParam("axis", axis)
           .SetInput("data", data)
           .SetInput("sequence_length", sequence_length)
           .CreateSymbol(symbol_name);
}

/*! \brief If this is set to null, the output gradient will not be normalized. If this is
 *        set to batch, the output gradient will be divided by the batch size. If this is
 *        set to valid, the output gradient will be divided by the number of valid input
 */
enum class MakeLossNormalization {
  kBatch = 0,
  kNull = 1,
  kValid = 2
};

/*!
 * \brief Make your own loss function in network construction.
 *
 *        This operator accepts a customized loss function symbol as a terminal loss and
 *        the symbol should be an operator with no backward dependency.
 *        The output of this function is the gradient of loss with respect to the input
 *
 *        For example, if you are a making a cross entropy loss function. Assume ``out``
 *        predicted output and ``label`` is the true label, then the cross entropy can be
 *
 *        cross_entropy = label * log(out) + (1 - label) * log(1 - out)
 *        loss = MakeLoss(cross_entropy)
 *
 *        We will need to use ``MakeLoss`` when we are creating our own loss function or
 *        combine multiple loss functions. Also we may want to stop some variables'
 *        from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.
 *
 *        In addition, we can give a scale to the loss by setting ``grad_scale``,
 *        so that the gradient of the loss will be rescaled in the backpropagation.
 *
 *        .. note:: This operator should be used as a Symbol instead of NDArray.
 *
 *
 *
 *        Defined in src/operator/make_loss.cc:L71
 * \param symbol_name name of the resulting symbol
 * \param data Input array.
 * \param grad_scale Gradient scale as a supplement to unary and binary operators
 * \param valid_thresh clip each element in the array to 0 when it is less than
 * \param normalization If this is set to null, the output gradient will not be
 *        normalized. If this is set to batch, the output gradient will be divided by the
 *        batch size. If this is set to valid, the output gradient will be divided by the
 * \return new symbol
 */
inline Symbol MakeLoss(const std::string& symbol_name,
                       Symbol data,
                       mx_float grad_scale = 1,
                       mx_float valid_thresh = 0,
                       MakeLossNormalization normalization = MakeLossNormalization::kNull) {
  static const char *MakeLossNormalizationValues[] = {
    "batch",
    "null",
    "valid"
  };
  return Operator("MakeLoss")
           .SetParam("grad_scale", grad_scale)
           .SetParam("valid_thresh", valid_thresh)
           .SetParam("normalization", MakeLossNormalizationValues[int(normalization)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Computes support vector machine based transformation of the input.
 *
 *        This tutorial demonstrates using SVM as output layer for classification instead
 *        https://github.com/dmlc/mxnet/tree/master/example/svm_mnist.
 *
 *
 * \param symbol_name name of the resulting symbol
 * \param data Input data for SVM transformation.
 * \param label Class label for the input data.
 * \param margin The loss function penalizes outputs that lie outside this margin.
 * \param regularization_coefficient Regularization parameter for the SVM. This balances
 * \param use_linear Whether to use L1-SVM objective. L2-SVM objective is used by default.
 * \return new symbol
 */
inline Symbol SVMOutput(const std::string& symbol_name,
                        Symbol data,
                        Symbol label,
                        mx_float margin = 1,
                        mx_float regularization_coefficient = 1,
                        bool use_linear = false) {
  return Operator("SVMOutput")
           .SetParam("margin", margin)
           .SetParam("regularization_coefficient", regularization_coefficient)
           .SetParam("use_linear", use_linear)
           .SetInput("data", data)
           .SetInput("label", label)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Applies correlation to inputs.
 *
 *        The correlation layer performs multiplicative patch comparisons between two
 *
 *        Given two multi-channel feature maps :math:`f_{1}, f_{2}`, with :math:`w`,
 *        the correlation layer lets the network compare each patch from :math:`f_{1}`
 *
 *        For now we consider only a single comparison of two patches. The 'correlation'
 *        :math:`x_{2}` in the second map is then defined as:
 *
 *        .. math::
 *
 *        c(x_{1}, x_{2}) = \sum_{o \in [-k,k] \times [-k,k]} <f_{1}(x_{1} + o),
 *
 *        for a square patch of size :math:`K:=2k+1`.
 *
 *        Note that the equation above is identical to one step of a convolution in
 *        neural networks, but instead of convolving data with a filter, it convolves
 *        data. For this reason, it has no training weights.
 *
 *        Computing :math:`c(x_{1}, x_{2})` involves :math:`c * K^{2}` multiplications.
 *
 *        Given a maximum displacement :math:`d`, for each location :math:`x_{1}` it
 *        computes correlations :math:`c(x_{1}, x_{2})` only in a neighborhood of size
 *        by limiting the range of :math:`x_{2}`. We use strides :math:`s_{1}, s_{2}`, to
 *        quantize :math:`x_{1}` globally and to quantize :math:`x_{2}` within the
 *        centered around :math:`x_{1}`.
 *
 *        The final output is defined by the following expression:
 *
 *        .. math::
 *        out[n, q, i, j] = c(x_{i, j}, x_{q})
 *
 *        where :math:`i` and :math:`j` enumerate spatial locations in :math:`f_{1}`, and
 *
 *
 *        Defined in src/operator/correlation.cc:L198
 * \param symbol_name name of the resulting symbol
 * \param data1 Input data1 to the correlation.
 * \param data2 Input data2 to the correlation.
 * \param kernel_size kernel size for Correlation must be an odd number
 * \param max_displacement Max displacement of Correlation
 * \param stride1 stride1 quantize data1 globally
 * \param stride2 stride2 quantize data2 within the neighborhood centered around data1
 * \param pad_size pad for Correlation
 * \param is_multiply operation type is either multiplication or subduction
 * \return new symbol
 */
inline Symbol Correlation(const std::string& symbol_name,
                          Symbol data1,
                          Symbol data2,
                          uint32_t kernel_size = 1,
                          uint32_t max_displacement = 1,
                          uint32_t stride1 = 1,
                          uint32_t stride2 = 1,
                          uint32_t pad_size = 0,
                          bool is_multiply = true) {
  return Operator("Correlation")
           .SetParam("kernel_size", kernel_size)
           .SetParam("max_displacement", max_displacement)
           .SetParam("stride1", stride1)
           .SetParam("stride2", stride2)
           .SetParam("pad_size", pad_size)
           .SetParam("is_multiply", is_multiply)
           .SetInput("data1", data1)
           .SetInput("data2", data2)
           .CreateSymbol(symbol_name);
}

/*! \brief Specify the dimension along which to compute L2 norm.
 */
enum class L2NormalizationMode {
  kChannel = 0,
  kInstance = 1,
  kSpatial = 2
};

/*!
 * \brief Normalize the input array using the L2 norm.
 *
 *        For 1-D NDArray, it computes::
 *
 *        out = data / sqrt(sum(data ** 2) + eps)
 *
 *        For N-D NDArray, if the input array has shape (N, N, ..., N),
 *
 *        with ``mode`` = ``instance``, it normalizes each instance in the
 *        array by its L2 norm.::
 *
 *        for i in 0...N
 *        out[i,:,:,...,:] = data[i,:,:,...,:] / sqrt(sum(data[i,:,:,...,:] ** 2) + eps)
 *
 *        with ``mode`` = ``channel``, it normalizes each channel in the array by its L2
 *
 *        for i in 0...N
 *        out[:,i,:,...,:] = data[:,i,:,...,:] / sqrt(sum(data[:,i,:,...,:] ** 2) + eps)
 *
 *        with ``mode`` = ``spatial``, it normalizes the cross channel norm for each
 *        in the array by its L2 norm.::
 *
 *        for dim in 2...N
 *        for i in 0...N
 *        out[.....,i,...] = take(out, indices=i, axis=dim) / sqrt(sum(take(out,
 *        -dim-
 *
 *        Example::
 *
 *        x = [[[1,2],
 *        [3,4]],
 *        [[2,2],
 *        [5,6]]]
 *
 *        L2Normalization(x, mode='instance')
 *        =[[[ 0.18257418  0.36514837]
 *        [ 0.54772252  0.73029673]]
 *        [[ 0.24077171  0.24077171]
 *        [ 0.60192931  0.72231513]]]
 *
 *        L2Normalization(x, mode='channel')
 *        =[[[ 0.31622776  0.44721359]
 *        [ 0.94868326  0.89442718]]
 *        [[ 0.37139067  0.31622776]
 *        [ 0.92847669  0.94868326]]]
 *
 *        L2Normalization(x, mode='spatial')
 *        =[[[ 0.44721359  0.89442718]
 *        [ 0.60000002  0.80000001]]
 *        [[ 0.70710677  0.70710677]
 *        [ 0.6401844   0.76822126]]]
 *
 *
 *
 *        Defined in src/operator/l2_normalization.cc:L196
 * \param symbol_name name of the resulting symbol
 * \param data Input array to normalize.
 * \param eps A small constant for numerical stability.
 * \param mode Specify the dimension along which to compute L2 norm.
 * \return new symbol
 */
inline Symbol L2Normalization(const std::string& symbol_name,
                              Symbol data,
                              mx_float eps = 1.00000001e-10,
                              L2NormalizationMode mode = L2NormalizationMode::kInstance) {
  static const char *L2NormalizationModeValues[] = {
    "channel",
    "instance",
    "spatial"
  };
  return Operator("L2Normalization")
           .SetParam("eps", eps)
           .SetParam("mode", L2NormalizationModeValues[int(mode)])
           .SetInput("data", data)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Sets all elements outside the sequence to a constant value.
 *
 *        This function takes an n-dimensional input array of the form
 *        [max_sequence_length, batch_size, other_feature_dims] and returns an array of
 *
 *        Parameter `sequence_length` is used to handle variable-length sequences.
 *        should be an input array of positive ints of dimension [batch_size].
 *        To use this parameter, set `use_sequence_length` to `True`,
 *        otherwise each example in the batch is assumed to have the max sequence length
 *        this operator works as the `identity` operator.
 *
 *        Example::
 *
 *        x = [[[  1.,   2.,   3.],
 *        [  4.,   5.,   6.]],
 *
 *        [[  7.,   8.,   9.],
 *        [ 10.,  11.,  12.]],
 *
 *        [[ 13.,  14.,   15.],
 *        [ 16.,  17.,   18.]]]
 *
 *        // Batch 1
 *        B1 = [[  1.,   2.,   3.],
 *        [  7.,   8.,   9.],
 *        [ 13.,  14.,  15.]]
 *
 *        // Batch 2
 *        B2 = [[  4.,   5.,   6.],
 *        [ 10.,  11.,  12.],
 *        [ 16.,  17.,  18.]]
 *
 *        // works as identity operator when sequence_length parameter is not used
 *        SequenceMask(x) = [[[  1.,   2.,   3.],
 *        [  4.,   5.,   6.]],
 *
 *        [[  7.,   8.,   9.],
 *        [ 10.,  11.,  12.]],
 *
 *        [[ 13.,  14.,   15.],
 *        [ 16.,  17.,   18.]]]
 *
 *        // sequence_length [1,1] means 1 of each batch will be kept
 *        // and other rows are masked with default mask value = 0
 *        SequenceMask(x, sequence_length=[1,1], use_sequence_length=True) =
 *        [[[  1.,   2.,   3.],
 *        [  4.,   5.,   6.]],
 *
 *        [[  0.,   0.,   0.],
 *        [  0.,   0.,   0.]],
 *
 *        [[  0.,   0.,   0.],
 *        [  0.,   0.,   0.]]]
 *
 *        // sequence_length [2,3] means 2 of batch B1 and 3 of batch B2 will be kept
 *        // and other rows are masked with value = 1
 *        SequenceMask(x, sequence_length=[2,3], use_sequence_length=True, value=1) =
 *        [[[  1.,   2.,   3.],
 *        [  4.,   5.,   6.]],
 *
 *        [[  7.,   8.,   9.],
 *        [  10.,  11.,  12.]],
 *
 *        [[   1.,   1.,   1.],
 *        [  16.,  17.,  18.]]]
 *
 *
 *
 *        Defined in src/operator/sequence_mask.cc:L186
 * \param symbol_name name of the resulting symbol
 * \param data n-dimensional input array of the form [max_sequence_length, batch_size,
 * \param sequence_length vector of sequence lengths of the form [batch_size]
 * \param use_sequence_length If set to true, this layer takes in an extra input
 * \param value The value to be used as a mask.
 * \param axis The sequence axis. Only values of 0 and 1 are currently supported.
 * \return new symbol
 */
inline Symbol SequenceMask(const std::string& symbol_name,
                           Symbol data,
                           Symbol sequence_length,
                           bool use_sequence_length = false,
                           mx_float value = 0,
                           int axis = 0) {
  return Operator("SequenceMask")
           .SetParam("use_sequence_length", use_sequence_length)
           .SetParam("value", value)
           .SetParam("axis", axis)
           .SetInput("data", data)
           .SetInput("sequence_length", sequence_length)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param src Source input to the function.
 * \return new symbol
 */
inline Symbol _set_value(const std::string& symbol_name,
                         mx_float src) {
  return Operator("_set_value")
           .SetParam("src", src)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief
 * \param symbol_name name of the resulting symbol
 * \param lhs Left operand to the function.
 * \param rhs Right operand to the function.
 * \return new symbol
 */
inline Symbol _onehot_encode(const std::string& symbol_name,
                             Symbol lhs,
                             Symbol rhs) {
  return Operator("_onehot_encode")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Fill one element of each line(row for python, column for R/Julia) in lhs
 *        according to index indicated by rhs and values indicated by mhs. This function
 * \param symbol_name name of the resulting symbol
 * \param lhs Left operand to the function.
 * \param mhs Middle operand to the function.
 * \param rhs Right operand to the function.
 * \return new symbol
 */
inline Symbol fill_element_0index(const std::string& symbol_name,
                                  Symbol lhs,
                                  Symbol mhs,
                                  Symbol rhs) {
  return Operator("fill_element_0index")
           .SetInput("lhs", lhs)
           .SetInput("mhs", mhs)
           .SetInput("rhs", rhs)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Decode an image, clip to (x0, y0, x1, y1), subtract mean, and write to buffer
 * \param symbol_name name of the resulting symbol
 * \param mean image mean
 * \param index buffer position for output
 * \param x0 x0
 * \param y0 y0
 * \param x1 x1
 * \param y1 y1
 * \param c channel
 * \param size length of str_img
 * \return new symbol
 */
inline Symbol _imdecode(const std::string& symbol_name,
                        Symbol mean,
                        int index,
                        int x0,
                        int y0,
                        int x1,
                        int y1,
                        int c,
                        int size) {
  return Operator("_imdecode")
           .SetParam("index", index)
           .SetParam("x0", x0)
           .SetParam("y0", y0)
           .SetParam("x1", x1)
           .SetParam("y1", y1)
           .SetParam("c", c)
           .SetParam("size", size)
           .SetInput("mean", mean)
           .CreateSymbol(symbol_name);
}

/*!
 * \brief Returns result of first array elements raised to powers from second array,
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_power(x, y) = [[ 2.,  2.,  2.],
 *        [ 4.,  4.,  4.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L45
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_power(Symbol lhs,
                              Symbol rhs) {
  return Operator("broadcast_power")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise maximum of the input arrays with broadcasting.
 *
 *        This function compares two input arrays and returns a new array having the
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_maximum(x, y) = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L80
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_maximum(Symbol lhs,
                                Symbol rhs) {
  return Operator("broadcast_maximum")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise minimum of the input arrays with broadcasting.
 *
 *        This function compares two input arrays and returns a new array having the
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_maximum(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L115
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_minimum(Symbol lhs,
                                Symbol rhs) {
  return Operator("broadcast_minimum")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns the hypotenuse of a right angled triangle, given its "legs"
 *        with broadcasting.
 *
 *        It is equivalent to doing :math:`sqrt(x_1^2 + x_2^2)`.
 *
 *        Example::
 *
 *        x = [[ 3.,  3.,  3.]]
 *
 *        y = [[ 4.],
 *        [ 4.]]
 *
 *        broadcast_hypot(x, y) = [[ 5.,  5.,  5.],
 *        [ 5.,  5.,  5.]]
 *
 *        z = [[ 0.],
 *        [ 4.]]
 *
 *        broadcast_hypot(x, z) = [[ 3.,  3.,  3.],
 *        [ 5.,  5.,  5.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L156
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_hypot(Symbol lhs,
                              Symbol rhs) {
  return Operator("broadcast_hypot")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _power(Symbol lhs,
                     Symbol rhs) {
  return Operator("_power")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _maximum(Symbol lhs,
                       Symbol rhs) {
  return Operator("_maximum")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _minimum(Symbol lhs,
                       Symbol rhs) {
  return Operator("_minimum")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Given the "legs" of a right triangle, return its hypotenuse.
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_op_extended.cc:L79
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _hypot(Symbol lhs,
                     Symbol rhs) {
  return Operator("_hypot")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Computes the square sum of array elements over a given axis
 *        for row-sparse matrix. This is a temporary solution for fusing ops square and
 *        sum together for row-sparse matrix to save memory for storing gradients.
 *        It will become deprecated once the functionality of fusing operators is finished
 *        in the future.
 *
 *        Example::
 *
 *        dns = mx.nd.array([[0, 0], [1, 2], [0, 0], [3, 4], [0, 0]])
 *        rsp = dns.tostype('row_sparse')
 *        sum = mx.nd._internal._square_sum(rsp, axis=1)
 *        sum = [0, 5, 0, 25, 0]
 *
 *
 *        Defined in src/operator/tensor/square_sum.cc:L63
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol _square_sum(Symbol data,
                          dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                          bool keepdims = false,
                          bool exclude = false) {
  return Operator("_square_sum")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Adds arguments element-wise.
 *
 *        The storage type of ``elemwise_add`` output depends on storage types of inputs
 *
 *        - elemwise_add(row_sparse, row_sparse) = row_sparse
 *        - elemwise_add(csr, csr) = csr
 *        - elemwise_add(default, csr) = default
 *        - elemwise_add(csr, default) = default
 *        - elemwise_add(default, rsp) = default
 *        - elemwise_add(rsp, default) = default
 *        - otherwise, ``elemwise_add`` generates output with default storage
 *
 *
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol elemwise_add(Symbol lhs,
                           Symbol rhs) {
  return Operator("elemwise_add")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _grad_add(Symbol lhs,
                        Symbol rhs) {
  return Operator("_grad_add")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Subtracts arguments element-wise.
 *
 *        The storage type of ``elemwise_sub`` output depends on storage types of inputs
 *
 *        - elemwise_sub(row_sparse, row_sparse) = row_sparse
 *        - elemwise_sub(csr, csr) = csr
 *        - elemwise_sub(default, csr) = default
 *        - elemwise_sub(csr, default) = default
 *        - elemwise_sub(default, rsp) = default
 *        - elemwise_sub(rsp, default) = default
 *        - otherwise, ``elemwise_sub`` generates output with default storage
 *
 *
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol elemwise_sub(Symbol lhs,
                           Symbol rhs) {
  return Operator("elemwise_sub")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Multiplies arguments element-wise.
 *
 *        The storage type of ``elemwise_mul`` output depends on storage types of inputs
 *
 *        - elemwise_mul(default, default) = default
 *        - elemwise_mul(row_sparse, row_sparse) = row_sparse
 *        - elemwise_mul(default, row_sparse) = row_sparse
 *        - elemwise_mul(row_sparse, default) = row_sparse
 *        - elemwise_mul(csr, csr) = csr
 *        - otherwise, ``elemwise_mul`` generates output with default storage
 *
 *
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol elemwise_mul(Symbol lhs,
                           Symbol rhs) {
  return Operator("elemwise_mul")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Divides arguments element-wise.
 *
 *        The storage type of ``elemwise_div`` output is always dense
 *
 *
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol elemwise_div(Symbol lhs,
                           Symbol rhs) {
  return Operator("elemwise_div")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _mod(Symbol lhs,
                   Symbol rhs) {
  return Operator("_mod")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Dot product of two arrays.
 *
 *        ``dot``'s behavior depends on the input array dimensions:
 *
 *        - 1-D arrays: inner product of vectors
 *        - 2-D arrays: matrix multiplication
 *        - N-D arrays: a sum product over the last axis of the first input and the first
 *        axis of the second input
 *
 *        For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape
 *        result array will have shape `(n,m,r,s)`. It is computed by::
 *
 *        dot(x,y)[i,j,a,b] = sum(x[i,j,:]*y[:,a,b])
 *
 *        Example::
 *
 *        x = reshape([0,1,2,3,4,5,6,7], shape=(2,2,2))
 *        y = reshape([7,6,5,4,3,2,1,0], shape=(2,2,2))
 *        dot(x,y)[0,0,1,1] = 0
 *        sum(x[0,0,:]*y[:,1,1]) = 0
 *
 *        The storage type of ``dot`` output depends on storage types of inputs,
 *        forward_stype option for output storage type. Implemented sparse operations
 *
 *        - dot(default, default, transpose_a=True/False, transpose_b=True/False) =
 *        - dot(csr, default, transpose_a=True) = default
 *        - dot(csr, default, transpose_a=True) = row_sparse
 *        - dot(csr, default) = default
 *        - dot(csr, row_sparse) = default
 *        - dot(default, csr) = csr (CPU only)
 *        - dot(default, csr, forward_stype='default') = default
 *        - dot(default, csr, transpose_b=True, forward_stype='default') = default
 *
 *        If the combination of input storage types and forward_stype does not match any
 *        above patterns, ``dot`` will fallback and generate output with default storage.
 *
 *        .. Note::
 *
 *        If the storage type of the lhs is "csr", the storage type of gradient w.r.t rhs
 *        "row_sparse". Only a subset of optimizers support sparse gradients, including
 *        and Adam. Note that by default lazy updates is turned on, which may perform
 *        from standard updates. For more details, please check the Optimization API at:
 *        https://mxnet.incubator.apache.org/api/python/optimization/optimization.html
 *
 *
 *
 *        Defined in src/operator/tensor/dot.cc:L77
 * \param lhs The first input
 * \param rhs The second input
 * \param transpose_a If true then transpose the first input before dot.
 * \param transpose_b If true then transpose the second input before dot.
 * \param forward_stype The desired storage type of the forward output given by user, if
 *        thecombination of input storage types and this hint does not matchany
 *        implemented ones, the dot operator will perform fallback operationand still
 * \return new symbol
 */
inline Symbol dot(Symbol lhs,
                  Symbol rhs,
                  bool transpose_a = false,
                  bool transpose_b = false,
                  DotForwardStype forward_stype = DotForwardStype::kNone) {
  static const char *DotForwardStypeValues[] = {
    "None",
    "csr",
    "default",
    "row_sparse"
  };
  return Operator("dot")
           .SetParam("transpose_a", transpose_a)
           .SetParam("transpose_b", transpose_b)
           .SetParam("forward_stype", DotForwardStypeValues[int(forward_stype)])
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Batchwise dot product.
 *
 *        ``batch_dot`` is used to compute dot product of ``x`` and ``y`` when ``x`` and
 *        ``y`` are data in batch, namely 3D arrays in shape of `(batch_size, :, :)`.
 *
 *        For example, given ``x`` with shape `(batch_size, n, m)` and ``y`` with shape
 *        `(batch_size, m, k)`, the result array will have shape `(batch_size, n, k)`,
 *        which is computed by::
 *
 *        batch_dot(x,y)[i,:,:] = dot(x[i,:,:], y[i,:,:])
 *
 *
 *
 *        Defined in src/operator/tensor/dot.cc:L125
 * \param lhs The first input
 * \param rhs The second input
 * \param transpose_a If true then transpose the first input before dot.
 * \param transpose_b If true then transpose the second input before dot.
 * \param forward_stype The desired storage type of the forward output given by user, if
 *        thecombination of input storage types and this hint does not matchany
 *        implemented ones, the dot operator will perform fallback operationand still
 * \return new symbol
 */
inline Symbol batch_dot(Symbol lhs,
                        Symbol rhs,
                        bool transpose_a = false,
                        bool transpose_b = false,
                        Batch_dotForwardStype forward_stype = Batch_dotForwardStype::kNone) {
  static const char *Batch_dotForwardStypeValues[] = {
    "None",
    "csr",
    "default",
    "row_sparse"
  };
  return Operator("batch_dot")
           .SetParam("transpose_a", transpose_a)
           .SetParam("transpose_b", transpose_b)
           .SetParam("forward_stype", Batch_dotForwardStypeValues[int(forward_stype)])
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief fill target with zeros without default dtype
 * \param shape The shape of the output
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _zeros_without_dtype(Shape shape = Shape(),
                                   const std::string& ctx = "",
                                   int dtype = -1) {
  return Operator("_zeros_without_dtype")
           .SetParam("shape", shape)
           .SetParam("dtype", dtype)
           .CreateSymbol();
}

/*!
 * \brief fill target with zeros
 * \param shape The shape of the output
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _zeros(Shape shape = {},
                     const std::string& ctx = "",
                     _zerosDtype dtype = _zerosDtype::kFloat32) {
  static const char *_zerosDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_zeros")
           .SetParam("shape", shape)
           .SetParam("dtype", _zerosDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Return a 2-D array with ones on the diagonal and zeros elsewhere.
 * \param N Number of rows in the output.
 * \param M Number of columns in the output. If 0, defaults to N
 * \param k Index of the diagonal. 0 (the default) refers to the main diagonal.A positive
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _eye(int64_t N,
                   int64_t M = 0,
                   int64_t k = 0,
                   const std::string& ctx = "",
                   _eyeDtype dtype = _eyeDtype::kFloat32) {
  static const char *_eyeDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_eye")
           .SetParam("N", N)
           .SetParam("M", M)
           .SetParam("k", k)
           .SetParam("dtype", _eyeDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief fill target with ones
 * \param shape The shape of the output
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _ones(Shape shape = {},
                    const std::string& ctx = "",
                    _onesDtype dtype = _onesDtype::kFloat32) {
  static const char *_onesDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_ones")
           .SetParam("shape", shape)
           .SetParam("dtype", _onesDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief fill target with a scalar value
 * \param value Value with which to fill newly created tensor
 * \param shape The shape of the output
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _full(double value,
                    Shape shape = Shape(),
                    const std::string& ctx = "",
                    _fullDtype dtype = _fullDtype::kFloat32) {
  static const char *_fullDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_full")
           .SetParam("value", value)
           .SetParam("shape", shape)
           .SetParam("dtype", _fullDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Return evenly spaced values within a given interval. Similar to Numpy
 * \param start Start of interval. The interval includes this value. The default start
 * \param stop End of interval. The interval does not include this value, except in some
 *        cases where step is not an integer and floating point round-off affects the
 * \param step Spacing between values.
 * \param repeat The repeating time of all elements. E.g repeat=3, the element a will be
 * \param infer_range When set to True, infer the stop position from the start, step,
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _arange(double start,
                      dmlc::optional<double> stop = dmlc::optional<double>(),
                      double step = 1,
                      int repeat = 1,
                      bool infer_range = false,
                      const std::string& ctx = "",
                      _arangeDtype dtype = _arangeDtype::kFloat32) {
  static const char *_arangeDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_arange")
           .SetParam("start", start)
           .SetParam("stop", stop)
           .SetParam("step", step)
           .SetParam("repeat", repeat)
           .SetParam("infer_range", infer_range)
           .SetParam("dtype", _arangeDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Return an array with evenly spaced values. If axis is not given, the output will
 *        have the same shape as the input array. Otherwise, the output will be a 1-D
 *        the specified axis in input shape.
 *
 *        Examples::
 *
 *        x = [[0.14883883 0.7772398  0.94865847 0.7225052 ]
 *        [0.23729339 0.6112595  0.66538996 0.5132841 ]
 *        [0.30822644 0.9912457  0.15502319 0.7043658 ]]
 *        <NDArray 3x4 @cpu(0)>
 *
 *        out = mx.nd.contrib.arange_like(x, start=0)
 *
 *        [[ 0.  1.  2.  3.]
 *        [ 4.  5.  6.  7.]
 *        [ 8.  9. 10. 11.]]
 *        <NDArray 3x4 @cpu(0)>
 *
 *        out = mx.nd.contrib.arange_like(x, start=0, axis=-1)
 *
 *        [0. 1. 2. 3.]
 *        <NDArray 4 @cpu(0)>
 *
 * \param data The input
 * \return new symbol
 */
inline Symbol _contrib_arange_like(Symbol data) {
  return Operator("_contrib_arange_like")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Return evenly spaced numbers over a specified interval. Similar to Numpy
 * \param start Start of interval. The interval includes this value. The default start
 * \param stop End of interval. The interval does not include this value, except in some
 *        cases where step is not an integer and floating point round-off affects the
 * \param step Spacing between values.
 * \param repeat The repeating time of all elements. E.g repeat=3, the element a will be
 * \param infer_range When set to True, infer the stop position from the start, step,
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n).Only used for
 * \param dtype Target data type.
 * \return new symbol
 */
inline Symbol _linspace(double start,
                        dmlc::optional<double> stop = dmlc::optional<double>(),
                        double step = 1,
                        int repeat = 1,
                        bool infer_range = false,
                        const std::string& ctx = "",
                        _linspaceDtype dtype = _linspaceDtype::kFloat32) {
  static const char *_linspaceDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_linspace")
           .SetParam("start", start)
           .SetParam("stop", stop)
           .SetParam("step", step)
           .SetParam("repeat", repeat)
           .SetParam("infer_range", infer_range)
           .SetParam("dtype", _linspaceDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Return an array of zeros with the same shape, type and storage type
 *        as the input array.
 *
 *        The storage type of ``zeros_like`` output depends on the storage type of the
 *
 *        - zeros_like(row_sparse) = row_sparse
 *        - zeros_like(csr) = csr
 *        - zeros_like(default) = default
 *
 *        Examples::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        zeros_like(x) = [[ 0.,  0.,  0.],
 *        [ 0.,  0.,  0.]]
 *
 *
 * \param data The input
 * \return new symbol
 */
inline Symbol zeros_like(Symbol data) {
  return Operator("zeros_like")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Return an array of ones with the same shape and type
 *        as the input array.
 *
 *        Examples::
 *
 *        x = [[ 0.,  0.,  0.],
 *        [ 0.,  0.,  0.]]
 *
 *        ones_like(x) = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *
 * \param data The input
 * \return new symbol
 */
inline Symbol ones_like(Symbol data) {
  return Operator("ones_like")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Adds all input arguments element-wise.
 *
 *        .. math::
 *        add\_n(a_1, a_2, ..., a_n) = a_1 + a_2 + ... + a_n
 *
 *        ``add_n`` is potentially more efficient than calling ``add`` by `n` times.
 *
 *        The storage type of ``add_n`` output depends on storage types of inputs
 *
 *        - add_n(row_sparse, row_sparse, ..) = row_sparse
 *        - add_n(default, csr, default) = default
 *        - add_n(any input combinations longer than 4 (>4) with at least one default
 *        - otherwise, ``add_n`` falls all inputs back to default storage and generates
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_sum.cc:L155
 * \param args Positional input arguments
 * \return new symbol
 */
inline Symbol add_n(const std::vector<Symbol>& args) {
  return Operator("add_n")
(args)
           .CreateSymbol();
}

/*!
 * \brief Performs general matrix multiplication and accumulation.
 *        Input are tensors *A*, *B*, *C*, each of dimension *n >= 2* and having the same
 *        on the leading *n-2* dimensions.
 *
 *        If *n=2*, the BLAS3 function *gemm* is performed:
 *
 *        *out* = *alpha* \* *op*\ (*A*) \* *op*\ (*B*) + *beta* \* *C*
 *
 *        Here, *alpha* and *beta* are scalar parameters, and *op()* is either the
 *        matrix transposition (depending on *transpose_a*, *transpose_b*).
 *
 *        If *n>2*, *gemm* is performed separately for a batch of matrices. The column
 *        are given by the last dimensions of the tensors, the row indices by the axis
 *        parameter. By default, the trailing two dimensions will be used for matrix
 *
 *        For a non-default axis parameter, the operation performed is equivalent to a
 *        calls. For example let *A*, *B*, *C* be 5 dimensional tensors. Then gemm(*A*,
 *        to the following without the overhead of the additional swapaxis operations::
 *
 *        A1 = swapaxes(A, dim1=1, dim2=3)
 *        B1 = swapaxes(B, dim1=1, dim2=3)
 *        C = swapaxes(C, dim1=1, dim2=3)
 *        C = gemm(A1, B1, C)
 *        C = swapaxis(C, dim1=1, dim2=3)
 *
 *        When the input data is of type float32 and the environment variables
 *        and MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION are set to 1, this operator will
 *        pseudo-float16 precision (float32 math with float16 I/O) precision in order to
 *        Tensor Cores on suitable NVIDIA GPUs. This can sometimes give significant
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix multiply-add
 *        A = [[1.0, 1.0], [1.0, 1.0]]
 *        B = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
 *        C = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
 *        gemm(A, B, C, transpose_b=True, alpha=2.0, beta=10.0)
 *        = [[14.0, 14.0, 14.0], [14.0, 14.0, 14.0]]
 *
 *        // Batch matrix multiply-add
 *        A = [[[1.0, 1.0]], [[0.1, 0.1]]]
 *        B = [[[1.0, 1.0]], [[0.1, 0.1]]]
 *        C = [[[10.0]], [[0.01]]]
 *        gemm(A, B, C, transpose_b=True, alpha=2.0 , beta=10.0)
 *        = [[[104.0]], [[0.14]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L89
 * \param A Tensor of input matrices
 * \param B Tensor of input matrices
 * \param C Tensor of input matrices
 * \param transpose_a Multiply with transposed of first input (A).
 * \param transpose_b Multiply with transposed of second input (B).
 * \param alpha Scalar factor multiplied with A*B.
 * \param beta Scalar factor multiplied with C.
 * \param axis Axis corresponding to the matrix rows.
 * \return new symbol
 */
inline Symbol _linalg_gemm(Symbol A,
                           Symbol B,
                           Symbol C,
                           bool transpose_a = false,
                           bool transpose_b = false,
                           double alpha = 1,
                           double beta = 1,
                           int axis = -2) {
  return Operator("_linalg_gemm")
           .SetParam("transpose_a", transpose_a)
           .SetParam("transpose_b", transpose_b)
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetParam("axis", axis)
           .SetInput("A", A)
           .SetInput("B", B)
           .SetInput("C", C)
           .CreateSymbol();
}

/*!
 * \brief Performs general matrix multiplication.
 *        Input are tensors *A*, *B*, each of dimension *n >= 2* and having the same shape
 *        on the leading *n-2* dimensions.
 *
 *        If *n=2*, the BLAS3 function *gemm* is performed:
 *
 *        *out* = *alpha* \* *op*\ (*A*) \* *op*\ (*B*)
 *
 *        Here *alpha* is a scalar parameter and *op()* is either the identity or the
 *        transposition (depending on *transpose_a*, *transpose_b*).
 *
 *        If *n>2*, *gemm* is performed separately for a batch of matrices. The column
 *        are given by the last dimensions of the tensors, the row indices by the axis
 *        parameter. By default, the trailing two dimensions will be used for matrix
 *
 *        For a non-default axis parameter, the operation performed is equivalent to a
 *        calls. For example let *A*, *B* be 5 dimensional tensors. Then gemm(*A*, *B*,
 *        the following without the overhead of the additional swapaxis operations::
 *
 *        A1 = swapaxes(A, dim1=1, dim2=3)
 *        B1 = swapaxes(B, dim1=1, dim2=3)
 *        C = gemm2(A1, B1)
 *        C = swapaxis(C, dim1=1, dim2=3)
 *
 *        When the input data is of type float32 and the environment variables
 *        and MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION are set to 1, this operator will
 *        pseudo-float16 precision (float32 math with float16 I/O) precision in order to
 *        Tensor Cores on suitable NVIDIA GPUs. This can sometimes give significant
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix multiply
 *        A = [[1.0, 1.0], [1.0, 1.0]]
 *        B = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
 *        gemm2(A, B, transpose_b=True, alpha=2.0)
 *        = [[4.0, 4.0, 4.0], [4.0, 4.0, 4.0]]
 *
 *        // Batch matrix multiply
 *        A = [[[1.0, 1.0]], [[0.1, 0.1]]]
 *        B = [[[1.0, 1.0]], [[0.1, 0.1]]]
 *        gemm2(A, B, transpose_b=True, alpha=2.0)
 *        = [[[4.0]], [[0.04 ]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L163
 * \param A Tensor of input matrices
 * \param B Tensor of input matrices
 * \param transpose_a Multiply with transposed of first input (A).
 * \param transpose_b Multiply with transposed of second input (B).
 * \param alpha Scalar factor multiplied with A*B.
 * \param axis Axis corresponding to the matrix row indices.
 * \return new symbol
 */
inline Symbol _linalg_gemm2(Symbol A,
                            Symbol B,
                            bool transpose_a = false,
                            bool transpose_b = false,
                            double alpha = 1,
                            int axis = -2) {
  return Operator("_linalg_gemm2")
           .SetParam("transpose_a", transpose_a)
           .SetParam("transpose_b", transpose_b)
           .SetParam("alpha", alpha)
           .SetParam("axis", axis)
           .SetInput("A", A)
           .SetInput("B", B)
           .CreateSymbol();
}

/*!
 * \brief Performs Cholesky factorization of a symmetric positive-definite matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, the Cholesky factor *B* of the symmetric, positive definite matrix
 *        computed. *B* is triangular (entries of upper or lower triangle are all zero),
 *        positive diagonal entries, and:
 *
 *        *A* = *B* \* *B*\ :sup:`T`  if *lower* = *true*
 *        *A* = *B*\ :sup:`T` \* *B*  if *lower* = *false*
 *
 *        If *n>2*, *potrf* is performed separately on the trailing two dimensions for
 *        (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix factorization
 *        A = [[4.0, 1.0], [1.0, 4.25]]
 *        potrf(A) = [[2.0, 0], [0.5, 2.0]]
 *
 *        // Batch matrix factorization
 *        A = [[[4.0, 1.0], [1.0, 4.25]], [[16.0, 4.0], [4.0, 17.0]]]
 *        potrf(A) = [[[2.0, 0], [0.5, 2.0]], [[4.0, 0], [1.0, 4.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L214
 * \param A Tensor of input matrices to be decomposed
 * \return new symbol
 */
inline Symbol _linalg_potrf(Symbol A) {
  return Operator("_linalg_potrf")
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief Performs matrix inversion from a Cholesky factorization.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, *A* is a triangular matrix (entries of upper or lower triangle are
 *        with positive diagonal. We compute:
 *
 *        *out* = *A*\ :sup:`-T` \* *A*\ :sup:`-1` if *lower* = *true*
 *        *out* = *A*\ :sup:`-1` \* *A*\ :sup:`-T` if *lower* = *false*
 *
 *        In other words, if *A* is the Cholesky factor of a symmetric positive definite
 *        *B* (obtained by *potrf*), then
 *
 *        *out* = *B*\ :sup:`-1`
 *
 *        If *n>2*, *potri* is performed separately on the trailing two dimensions for
 *        (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        .. note:: Use this operator only if you are certain you need the inverse of
 *        cannot use the Cholesky factor *A* (*potrf*), together with backsubstitution
 *        (*trsm*). The latter is numerically much safer, and also cheaper.
 *
 *        Examples::
 *
 *        // Single matrix inverse
 *        A = [[2.0, 0], [0.5, 2.0]]
 *        potri(A) = [[0.26563, -0.0625], [-0.0625, 0.25]]
 *
 *        // Batch matrix inverse
 *        A = [[[2.0, 0], [0.5, 2.0]], [[4.0, 0], [1.0, 4.0]]]
 *        potri(A) = [[[0.26563, -0.0625], [-0.0625, 0.25]],
 *        [[0.06641, -0.01562], [-0.01562, 0,0625]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L275
 * \param A Tensor of lower triangular matrices
 * \return new symbol
 */
inline Symbol _linalg_potri(Symbol A) {
  return Operator("_linalg_potri")
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief Performs multiplication with a lower triangular matrix.
 *        Input are tensors *A*, *B*, each of dimension *n >= 2* and having the same shape
 *        on the leading *n-2* dimensions.
 *
 *        If *n=2*, *A* must be triangular. The operator performs the BLAS3 function
 *        *trmm*:
 *
 *        *out* = *alpha* \* *op*\ (*A*) \* *B*
 *
 *        if *rightside=False*, or
 *
 *        *out* = *alpha* \* *B* \* *op*\ (*A*)
 *
 *        if *rightside=True*. Here, *alpha* is a scalar parameter, and *op()* is either
 *        identity or the matrix transposition (depending on *transpose*).
 *
 *        If *n>2*, *trmm* is performed separately on the trailing two dimensions for all
 *        (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single triangular matrix multiply
 *        A = [[1.0, 0], [1.0, 1.0]]
 *        B = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
 *        trmm(A, B, alpha=2.0) = [[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]]
 *
 *        // Batch triangular matrix multiply
 *        A = [[[1.0, 0], [1.0, 1.0]], [[1.0, 0], [1.0, 1.0]]]
 *        B = [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]], [[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]]
 *        trmm(A, B, alpha=2.0) = [[[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]],
 *        [[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L333
 * \param A Tensor of lower triangular matrices
 * \param B Tensor of matrices
 * \param transpose Use transposed of the triangular matrix
 * \param rightside Multiply triangular matrix from the right to non-triangular one.
 * \param lower True if the triangular matrix is lower triangular, false if it is upper
 * \param alpha Scalar factor to be applied to the result.
 * \return new symbol
 */
inline Symbol _linalg_trmm(Symbol A,
                           Symbol B,
                           bool transpose = false,
                           bool rightside = false,
                           bool lower = true,
                           double alpha = 1) {
  return Operator("_linalg_trmm")
           .SetParam("transpose", transpose)
           .SetParam("rightside", rightside)
           .SetParam("lower", lower)
           .SetParam("alpha", alpha)
           .SetInput("A", A)
           .SetInput("B", B)
           .CreateSymbol();
}

/*!
 * \brief Solves matrix equation involving a lower triangular matrix.
 *        Input are tensors *A*, *B*, each of dimension *n >= 2* and having the same shape
 *        on the leading *n-2* dimensions.
 *
 *        If *n=2*, *A* must be triangular. The operator performs the BLAS3 function
 *        *trsm*, solving for *out* in:
 *
 *        *op*\ (*A*) \* *out* = *alpha* \* *B*
 *
 *        if *rightside=False*, or
 *
 *        *out* \* *op*\ (*A*) = *alpha* \* *B*
 *
 *        if *rightside=True*. Here, *alpha* is a scalar parameter, and *op()* is either
 *        identity or the matrix transposition (depending on *transpose*).
 *
 *        If *n>2*, *trsm* is performed separately on the trailing two dimensions for all
 *        (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix solve
 *        A = [[1.0, 0], [1.0, 1.0]]
 *        B = [[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]]
 *        trsm(A, B, alpha=0.5) = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
 *
 *        // Batch matrix solve
 *        A = [[[1.0, 0], [1.0, 1.0]], [[1.0, 0], [1.0, 1.0]]]
 *        B = [[[2.0, 2.0, 2.0], [4.0, 4.0, 4.0]],
 *        [[4.0, 4.0, 4.0], [8.0, 8.0, 8.0]]]
 *        trsm(A, B, alpha=0.5) = [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
 *        [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L396
 * \param A Tensor of lower triangular matrices
 * \param B Tensor of matrices
 * \param transpose Use transposed of the triangular matrix
 * \param rightside Multiply triangular matrix from the right to non-triangular one.
 * \param lower True if the triangular matrix is lower triangular, false if it is upper
 * \param alpha Scalar factor to be applied to the result.
 * \return new symbol
 */
inline Symbol _linalg_trsm(Symbol A,
                           Symbol B,
                           bool transpose = false,
                           bool rightside = false,
                           bool lower = true,
                           double alpha = 1) {
  return Operator("_linalg_trsm")
           .SetParam("transpose", transpose)
           .SetParam("rightside", rightside)
           .SetParam("lower", lower)
           .SetParam("alpha", alpha)
           .SetInput("A", A)
           .SetInput("B", B)
           .CreateSymbol();
}

/*!
 * \brief Computes the sum of the logarithms of the diagonal elements of a square matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, *A* must be square with positive diagonal entries. We sum the natural
 *        logarithms of the diagonal elements, the result has shape (1,).
 *
 *        If *n>2*, *sumlogdiag* is performed separately on the trailing two dimensions
 *        inputs (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix reduction
 *        A = [[1.0, 1.0], [1.0, 7.0]]
 *        sumlogdiag(A) = [1.9459]
 *
 *        // Batch matrix reduction
 *        A = [[[1.0, 1.0], [1.0, 7.0]], [[3.0, 0], [0, 17.0]]]
 *        sumlogdiag(A) = [1.9459, 3.9318]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L445
 * \param A Tensor of square matrices
 * \return new symbol
 */
inline Symbol _linalg_sumlogdiag(Symbol A) {
  return Operator("_linalg_sumlogdiag")
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief Extracts the diagonal entries of a square matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, then *A* represents a single square matrix which diagonal elements
 *
 *        If *n>2*, then *A* represents a batch of square matrices on the trailing two
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix diagonal extraction
 *        A = [[1.0, 2.0],
 *        [3.0, 4.0]]
 *
 *        extractdiag(A) = [1.0, 4.0]
 *
 *        extractdiag(A, 1) = [2.0]
 *
 *        // Batch matrix diagonal extraction
 *        A = [[[1.0, 2.0],
 *        [3.0, 4.0]],
 *        [[5.0, 6.0],
 *        [7.0, 8.0]]]
 *
 *        extractdiag(A) = [[1.0, 4.0],
 *        [5.0, 8.0]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L495
 * \param A Tensor of square matrices
 * \param offset Offset of the diagonal versus the main diagonal. 0 corresponds to the
 *        main diagonal, a negative/positive value to diagonals below/above the main
 * \return new symbol
 */
inline Symbol _linalg_extractdiag(Symbol A,
                                  int offset = 0) {
  return Operator("_linalg_extractdiag")
           .SetParam("offset", offset)
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief Constructs a square matrix with the input as diagonal.
 *        Input is a tensor *A* of dimension *n >= 1*.
 *
 *        If *n=1*, then *A* represents the diagonal entries of a single square matrix.
 *        If *n>1*, then *A* represents a batch of diagonals of square matrices. The
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single diagonal matrix construction
 *        A = [1.0, 2.0]
 *
 *        makediag(A)    = [[1.0, 0.0],
 *        [0.0, 2.0]]
 *
 *        makediag(A, 1) = [[0.0, 1.0, 0.0],
 *        [0.0, 0.0, 2.0],
 *        [0.0, 0.0, 0.0]]
 *
 *        // Batch diagonal matrix construction
 *        A = [[1.0, 2.0],
 *        [3.0, 4.0]]
 *
 *        makediag(A) = [[[1.0, 0.0],
 *        [0.0, 2.0]],
 *        [[3.0, 0.0],
 *        [0.0, 4.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L547
 * \param A Tensor of diagonal entries
 * \param offset Offset of the diagonal versus the main diagonal. 0 corresponds to the
 *        main diagonal, a negative/positive value to diagonals below/above the main
 * \return new symbol
 */
inline Symbol _linalg_makediag(Symbol A,
                               int offset = 0) {
  return Operator("_linalg_makediag")
           .SetParam("offset", offset)
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief Extracts a triangular sub-matrix from a square matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, then *A* represents a single square matrix from which a triangular
 *
 *        If *n>2*, then *A* represents a batch of square matrices on the trailing two
 *        dimensions. The extracted triangular sub-matrices are returned as an
 *
 *        The *offset* and *lower* parameters determine the triangle to be extracted:
 *
 *        - When *offset = 0* either the lower or upper triangle with respect to the main
 *        - When *offset = k > 0* the upper triangle with respect to the k-th diagonal
 *        - When *offset = k < 0* the lower triangle with respect to the k-th diagonal
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single triagonal extraction
 *        A = [[1.0, 2.0],
 *        [3.0, 4.0]]
 *
 *        extracttrian(A) = [1.0, 3.0, 4.0]
 *        extracttrian(A, lower=False) = [1.0, 2.0, 4.0]
 *        extracttrian(A, 1) = [2.0]
 *        extracttrian(A, -1) = [3.0]
 *
 *        // Batch triagonal extraction
 *        A = [[[1.0, 2.0],
 *        [3.0, 4.0]],
 *        [[5.0, 6.0],
 *        [7.0, 8.0]]]
 *
 *        extracttrian(A) = [[1.0, 3.0, 4.0],
 *        [5.0, 7.0, 8.0]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L605
 * \param A Tensor of square matrices
 * \param offset Offset of the diagonal versus the main diagonal. 0 corresponds to the
 *        main diagonal, a negative/positive value to diagonals below/above the main
 * \param lower Refer to the lower triangular matrix if lower=true, refer to the upper
 * \return new symbol
 */
inline Symbol _linalg_extracttrian(Symbol A,
                                   int offset = 0,
                                   bool lower = true) {
  return Operator("_linalg_extracttrian")
           .SetParam("offset", offset)
           .SetParam("lower", lower)
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief Constructs a square matrix with the input representing a specific triangular
 *        This is basically the inverse of *linalg.extracttrian*. Input is a tensor *A*
 *
 *        If *n=1*, then *A* represents the entries of a triangular matrix which is lower
 *        triangular if *offset<0* or *offset=0*, *lower=true*. The resulting matrix is
 *        matrix with the entries outside the triangle set to zero and then adding
 *        diagonal with zero entries to the square matrix.
 *
 *        If *n>1*, then *A* represents a batch of triangular sub-matrices. The batch of
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single  matrix construction
 *        A = [1.0, 2.0, 3.0]
 *
 *        maketrian(A)              = [[1.0, 0.0],
 *        [2.0, 3.0]]
 *
 *        maketrian(A, lower=false) = [[1.0, 2.0],
 *        [0.0, 3.0]]
 *
 *        maketrian(A, offset=1)    = [[0.0, 1.0, 2.0],
 *        [0.0, 0.0, 3.0],
 *        [0.0, 0.0, 0.0]]
 *        maketrian(A, offset=-1)   = [[0.0, 0.0, 0.0],
 *        [1.0, 0.0, 0.0],
 *        [2.0, 3.0, 0.0]]
 *
 *        // Batch matrix construction
 *        A = [[1.0, 2.0, 3.0],
 *        [4.0, 5.0, 6.0]]
 *
 *        maketrian(A)           = [[[1.0, 0.0],
 *        [2.0, 3.0]],
 *        [[4.0, 0.0],
 *        [5.0, 6.0]]]
 *
 *        maketrian(A, offset=1) = [[[0.0, 1.0, 2.0],
 *        [0.0, 0.0, 3.0],
 *        [0.0, 0.0, 0.0]],
 *        [[0.0, 4.0, 5.0],
 *        [0.0, 0.0, 6.0],
 *        [0.0, 0.0, 0.0]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L673
 * \param A Tensor of triangular matrices stored as vectors
 * \param offset Offset of the diagonal versus the main diagonal. 0 corresponds to the
 *        main diagonal, a negative/positive value to diagonals below/above the main
 * \param lower Refer to the lower triangular matrix if lower=true, refer to the upper
 * \return new symbol
 */
inline Symbol _linalg_maketrian(Symbol A,
                                int offset = 0,
                                bool lower = true) {
  return Operator("_linalg_maketrian")
           .SetParam("offset", offset)
           .SetParam("lower", lower)
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief Multiplication of matrix with its transpose.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, the operator performs the BLAS3 function *syrk*:
 *
 *        *out* = *alpha* \* *A* \* *A*\ :sup:`T`
 *
 *        if *transpose=False*, or
 *
 *        *out* = *alpha* \* *A*\ :sup:`T` \ \* *A*
 *
 *        if *transpose=True*.
 *
 *        If *n>2*, *syrk* is performed separately on the trailing two dimensions for all
 *        inputs (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix multiply
 *        A = [[1., 2., 3.], [4., 5., 6.]]
 *        syrk(A, alpha=1., transpose=False)
 *        = [[14., 32.],
 *        [32., 77.]]
 *        syrk(A, alpha=1., transpose=True)
 *        = [[17., 22., 27.],
 *        [22., 29., 36.],
 *        [27., 36., 45.]]
 *
 *        // Batch matrix multiply
 *        A = [[[1., 1.]], [[0.1, 0.1]]]
 *        syrk(A, alpha=2., transpose=False) = [[[4.]], [[0.04]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L730
 * \param A Tensor of input matrices
 * \param transpose Use transpose of input matrix.
 * \param alpha Scalar factor to be applied to the result.
 * \return new symbol
 */
inline Symbol _linalg_syrk(Symbol A,
                           bool transpose = false,
                           double alpha = 1) {
  return Operator("_linalg_syrk")
           .SetParam("transpose", transpose)
           .SetParam("alpha", alpha)
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief LQ factorization for general matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, we compute the LQ factorization (LAPACK *gelqf*, followed by
 *        must have shape *(x, y)* with *x <= y*, and must have full rank *=x*. The LQ
 *        factorization consists of *L* with shape *(x, x)* and *Q* with shape *(x, y)*,
 *        that:
 *
 *        *A* = *L* \* *Q*
 *
 *        Here, *L* is lower triangular (upper triangle equal to zero) with nonzero
 *        and *Q* is row-orthonormal, meaning that
 *
 *        *Q* \* *Q*\ :sup:`T`
 *
 *        is equal to the identity matrix of shape *(x, x)*.
 *
 *        If *n>2*, *gelqf* is performed separately on the trailing two dimensions for all
 *        inputs (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single LQ factorization
 *        A = [[1., 2., 3.], [4., 5., 6.]]
 *        Q, L = gelqf(A)
 *        Q = [[-0.26726124, -0.53452248, -0.80178373],
 *        [0.87287156, 0.21821789, -0.43643578]]
 *        L = [[-3.74165739, 0.],
 *        [-8.55235974, 1.96396101]]
 *
 *        // Batch LQ factorization
 *        A = [[[1., 2., 3.], [4., 5., 6.]],
 *        [[7., 8., 9.], [10., 11., 12.]]]
 *        Q, L = gelqf(A)
 *        Q = [[[-0.26726124, -0.53452248, -0.80178373],
 *        [0.87287156, 0.21821789, -0.43643578]],
 *        [[-0.50257071, -0.57436653, -0.64616234],
 *        [0.7620735, 0.05862104, -0.64483142]]]
 *        L = [[[-3.74165739, 0.],
 *        [-8.55235974, 1.96396101]],
 *        [[-13.92838828, 0.],
 *        [-19.09768702, 0.52758934]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L798
 * \param A Tensor of input matrices to be factorized
 * \return new symbol
 */
inline Symbol _linalg_gelqf(Symbol A) {
  return Operator("_linalg_gelqf")
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief Eigendecomposition for symmetric matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, *A* must be symmetric, of shape *(x, x)*. We compute the
 *        resulting in the orthonormal matrix *U* of eigenvectors, shape *(x, x)*, and the
 *        vector *L* of eigenvalues, shape *(x,)*, so that:
 *
 *        *U* \* *A* = *diag(L)* \* *U*
 *
 *        Here:
 *
 *        *U* \* *U*\ :sup:`T` = *U*\ :sup:`T` \* *U* = *I*
 *
 *        where *I* is the identity matrix. Also, *L(0) <= L(1) <= L(2) <= ...*
 *
 *        If *n>2*, *syevd* is performed separately on the trailing two dimensions of *A*
 *        mode). In this case, *U* has *n* dimensions like *A*, and *L* has *n-1*
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        .. note:: Derivatives for this operator are defined only if *A* is such that
 *        eigenvalues are distinct, and the eigengaps are not too small. If you need
 *        gradients, do not apply this operator to matrices with multiple eigenvalues.
 *
 *        Examples::
 *
 *        // Single symmetric eigendecomposition
 *        A = [[1., 2.], [2., 4.]]
 *        U, L = syevd(A)
 *        U = [[0.89442719, -0.4472136],
 *        [0.4472136, 0.89442719]]
 *        L = [0., 5.]
 *
 *        // Batch symmetric eigendecomposition
 *        A = [[[1., 2.], [2., 4.]],
 *        [[1., 2.], [2., 5.]]]
 *        U, L = syevd(A)
 *        U = [[[0.89442719, -0.4472136],
 *        [0.4472136, 0.89442719]],
 *        [[0.92387953, -0.38268343],
 *        [0.38268343, 0.92387953]]]
 *        L = [[0., 5.],
 *        [0.17157288, 5.82842712]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L867
 * \param A Tensor of input matrices to be factorized
 * \return new symbol
 */
inline Symbol _linalg_syevd(Symbol A) {
  return Operator("_linalg_syevd")
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief Compute the inverse of a matrix.
 *        Input is a tensor *A* of dimension *n >= 2*.
 *
 *        If *n=2*, *A* is a square matrix. We compute:
 *
 *        *out* = *A*\ :sup:`-1`
 *
 *        If *n>2*, *inverse* is performed separately on the trailing two dimensions
 *        for all inputs (batch mode).
 *
 *        .. note:: The operator supports float32 and float64 data types only.
 *
 *        Examples::
 *
 *        // Single matrix inversion
 *        A = [[1., 4.], [2., 3.]]
 *        inverse(A) = [[-0.6, 0.8], [0.4, -0.2]]
 *
 *        // Batch matrix inversion
 *        A = [[[1., 4.], [2., 3.]],
 *        [[1., 3.], [2., 4.]]]
 *        inverse(A) = [[[-0.6, 0.8], [0.4, -0.2]],
 *        [[-2., 1.5], [1., -0.5]]]
 *
 *
 *        Defined in src/operator/tensor/la_op.cc:L917
 * \param A Tensor of square matrix
 * \return new symbol
 */
inline Symbol _linalg_inverse(Symbol A) {
  return Operator("_linalg_inverse")
           .SetInput("A", A)
           .CreateSymbol();
}

/*!
 * \brief This operators implements the histogram function.
 *
 *        Example::
 *        x = [[0, 1], [2, 2], [3, 4]]
 *        histo, bin_edges = histogram(data=x, bin_bounds=[], bin_cnt=5, range=(0,5))
 *        histo = [1, 1, 2, 1, 1]
 *        bin_edges = [0., 1., 2., 3., 4.]
 *        histo, bin_edges = histogram(data=x, bin_bounds=[0., 2.1, 3.])
 *        histo = [4, 1]
 *
 *
 *
 *        Defined in src/operator/tensor/histogram.cc:L136
 * \param data Input ndarray
 * \param bins Input ndarray
 * \param bin_cnt Number of bins for uniform case
 * \param range The lower and upper range of the bins. if not provided, range is simply
 *        (a.min(), a.max()). values outside the range are ignored. the first element of
 *        the range must be less than or equal to the second. range affects the automatic
 *        bin computation as well. while bin width is computed to be optimal based on the
 *        actual data within range, the bin count will fill the entire range including
 * \return new symbol
 */
inline Symbol _histogram(Symbol data,
                         Symbol bins,
                         dmlc::optional<int> bin_cnt = dmlc::optional<int>(),
                         int64_t range = int64_t()) {
  return Operator("_histogram")
           .SetParam("bin_cnt", bin_cnt)
           .SetParam("range", range)
           .SetInput("data", data)
           .SetInput("bins", bins)
           .CreateSymbol();
}

/*!
 * \brief Returns indices of the maximum values along an axis.
 *
 *        In the case of multiple occurrences of maximum values, the indices
 *        are returned.
 *
 *        Examples::
 *
 *        x = [[ 0.,  1.,  2.],
 *        [ 3.,  4.,  5.]]
 *
 *        // argmax along axis 0
 *        argmax(x, axis=0) = [ 1.,  1.,  1.]
 *
 *        // argmax along axis 1
 *        argmax(x, axis=1) = [ 2.,  2.]
 *
 *        // argmax along axis 1 keeping same dims as an input array
 *        argmax(x, axis=1, keepdims=True) = [[ 2.],
 *        [ 2.]]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L52
 * \param data The input
 * \param axis The axis along which to perform the reduction. Negative values means
 *        indexing from right to left. ``Requires axis to be set as int, because global
 * \param keepdims If this is set to `True`, the reduced axis is left in the result as
 * \return new symbol
 */
inline Symbol argmax(Symbol data,
                     dmlc::optional<int> axis = dmlc::optional<int>(),
                     bool keepdims = false) {
  return Operator("argmax")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns indices of the minimum values along an axis.
 *
 *        In the case of multiple occurrences of minimum values, the indices
 *        are returned.
 *
 *        Examples::
 *
 *        x = [[ 0.,  1.,  2.],
 *        [ 3.,  4.,  5.]]
 *
 *        // argmin along axis 0
 *        argmin(x, axis=0) = [ 0.,  0.,  0.]
 *
 *        // argmin along axis 1
 *        argmin(x, axis=1) = [ 0.,  0.]
 *
 *        // argmin along axis 1 keeping same dims as an input array
 *        argmin(x, axis=1, keepdims=True) = [[ 0.],
 *        [ 0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L77
 * \param data The input
 * \param axis The axis along which to perform the reduction. Negative values means
 *        indexing from right to left. ``Requires axis to be set as int, because global
 * \param keepdims If this is set to `True`, the reduced axis is left in the result as
 * \return new symbol
 */
inline Symbol argmin(Symbol data,
                     dmlc::optional<int> axis = dmlc::optional<int>(),
                     bool keepdims = false) {
  return Operator("argmin")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns argmax indices of each channel from the input array.
 *
 *        The result will be an NDArray of shape (num_channel,).
 *
 *        In case of multiple occurrences of the maximum values, the indices
 *        are returned.
 *
 *        Examples::
 *
 *        x = [[ 0.,  1.,  2.],
 *        [ 3.,  4.,  5.]]
 *
 *        argmax_channel(x) = [ 2.,  2.]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L97
 * \param data The input array
 * \return new symbol
 */
inline Symbol argmax_channel(Symbol data) {
  return Operator("argmax_channel")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Picks elements from an input array according to the input indices along the
 *
 *        Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
 *        an output array of shape ``(i0,)`` with::
 *
 *        output[i] = input[i, indices[i]]
 *
 *        By default, if any index mentioned is too large, it is replaced by the index
 *        the last element along an axis (the `clip` mode).
 *
 *        This function supports n-dimensional input and (n-1)-dimensional indices arrays.
 *
 *        Examples::
 *
 *        x = [[ 1.,  2.],
 *        [ 3.,  4.],
 *        [ 5.,  6.]]
 *
 *        // picks elements with specified indices along axis 0
 *        pick(x, y=[0,1], 0) = [ 1.,  4.]
 *
 *        // picks elements with specified indices along axis 1
 *        pick(x, y=[0,1,0], 1) = [ 1.,  4.,  5.]
 *
 *        y = [[ 1.],
 *        [ 0.],
 *        [ 2.]]
 *
 *        // picks elements with specified indices along axis 1 using 'wrap' mode
 *        // to place indicies that would normally be out of bounds
 *        pick(x, y=[2,-1,-2], 1, mode='wrap') = [ 1.,  4.,  5.]
 *
 *        y = [[ 1.],
 *        [ 0.],
 *        [ 2.]]
 *
 *        // picks elements with specified indices along axis 1 and dims are maintained
 *        pick(x,y, 1, keepdims=True) = [[ 2.],
 *        [ 3.],
 *        [ 6.]]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L154
 * \param data The input array
 * \param index The index array
 * \param axis int or None. The axis to picking the elements. Negative values means
 *        indexing from right to left. If is `None`, the elements in the index w.r.t the
 * \param keepdims If true, the axis where we pick the elements is left in the result as
 * \param mode Specify how out-of-bound indices behave. Default is "clip". "clip" means
 *        clip to the range. So, if all indices mentioned are too large, they are
 *        replaced by the index that addresses the last element along an axis.  "wrap"
 * \return new symbol
 */
inline Symbol pick(Symbol data,
                   Symbol index,
                   dmlc::optional<int> axis = dmlc::optional<int>(-1),
                   bool keepdims = false,
                   PickMode mode = PickMode::kClip) {
  static const char *PickModeValues[] = {
    "clip",
    "wrap"
  };
  return Operator("pick")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("mode", PickModeValues[int(mode)])
           .SetInput("data", data)
           .SetInput("index", index)
           .CreateSymbol();
}

/*!
 * \brief Divides arguments element-wise.  If the left-hand-side input is 'row_sparse',
 *        only the values which exist in the left-hand sparse array are computed.  The
 *        are ignored.
 *
 *        The storage type of ``_scatter_elemwise_div`` output depends on storage types
 *
 *        - _scatter_elemwise_div(row_sparse, row_sparse) = row_sparse
 *        - _scatter_elemwise_div(row_sparse, dense) = row_sparse
 *        - _scatter_elemwise_div(row_sparse, csr) = row_sparse
 *        - otherwise, ``_scatter_elemwise_div`` behaves exactly like elemwise_div and
 *        with default storage
 *
 *
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _scatter_elemwise_div(Symbol lhs,
                                    Symbol rhs) {
  return Operator("_scatter_elemwise_div")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Adds a scalar to a tensor element-wise.  If the left-hand-side input is
 *        'row_sparse' or 'csr', then only the values which exist in the left-hand sparse
 *        The 'missing' values are ignored.
 *
 *        The storage type of ``_scatter_plus_scalar`` output depends on storage types of
 *
 *        - _scatter_plus_scalar(row_sparse, scalar) = row_sparse
 *        - _scatter_plus_scalar(csr, scalar) = csr
 *        - otherwise, ``_scatter_plus_scalar`` behaves exactly like _plus_scalar and
 *        with default storage
 *
 *
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _scatter_plus_scalar(Symbol data,
                                   mx_float scalar) {
  return Operator("_scatter_plus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Subtracts a scalar to a tensor element-wise.  If the left-hand-side input is
 *        'row_sparse' or 'csr', then only the values which exist in the left-hand sparse
 *        The 'missing' values are ignored.
 *
 *        The storage type of ``_scatter_minus_scalar`` output depends on storage types
 *
 *        - _scatter_minus_scalar(row_sparse, scalar) = row_sparse
 *        - _scatter_minus_scalar(csr, scalar) = csr
 *        - otherwise, ``_scatter_minus_scalar`` behaves exactly like _minus_scalar and
 *        with default storage
 *
 *
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _scatter_minus_scalar(Symbol data,
                                    mx_float scalar) {
  return Operator("_scatter_minus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise sum of the input arrays with broadcasting.
 *
 *        `broadcast_plus` is an alias to the function `broadcast_add`.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_add(x, y) = [[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]]
 *
 *        broadcast_plus(x, y) = [[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]]
 *
 *        Supported sparse operations:
 *
 *        broadcast_add(csr, dense(1D)) = dense
 *        broadcast_add(dense(1D), csr) = dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L58
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_add(Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_add")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise difference of the input arrays with broadcasting.
 *
 *        `broadcast_minus` is an alias to the function `broadcast_sub`.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_sub(x, y) = [[ 1.,  1.,  1.],
 *        [ 0.,  0.,  0.]]
 *
 *        broadcast_minus(x, y) = [[ 1.,  1.,  1.],
 *        [ 0.,  0.,  0.]]
 *
 *        Supported sparse operations:
 *
 *        broadcast_sub/minus(csr, dense(1D)) = dense
 *        broadcast_sub/minus(dense(1D), csr) = dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L106
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_sub(Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_sub")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise product of the input arrays with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_mul(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *        Supported sparse operations:
 *
 *        broadcast_mul(csr, dense(1D)) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L146
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_mul(Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_mul")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise division of the input arrays with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 6.,  6.,  6.],
 *        [ 6.,  6.,  6.]]
 *
 *        y = [[ 2.],
 *        [ 3.]]
 *
 *        broadcast_div(x, y) = [[ 3.,  3.,  3.],
 *        [ 2.,  2.,  2.]]
 *
 *        Supported sparse operations:
 *
 *        broadcast_div(csr, dense(1D)) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L187
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_div(Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_div")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise modulo of the input arrays with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 8.,  8.,  8.],
 *        [ 8.,  8.,  8.]]
 *
 *        y = [[ 2.],
 *        [ 3.]]
 *
 *        broadcast_mod(x, y) = [[ 0.,  0.,  0.],
 *        [ 2.,  2.,  2.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L222
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_mod(Symbol lhs,
                            Symbol rhs) {
  return Operator("broadcast_mod")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Computes rectified linear activation.
 *
 *        .. math::
 *        max(features, 0)
 *
 *        The storage type of ``relu`` output depends upon the input storage type:
 *
 *        - relu(default) = default
 *        - relu(row_sparse) = row_sparse
 *        - relu(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L85
 * \param data The input array.
 * \return new symbol
 */
inline Symbol relu(Symbol data) {
  return Operator("relu")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes sigmoid of x element-wise.
 *
 *        .. math::
 *        y = 1 / (1 + exp(-x))
 *
 *        The storage type of ``sigmoid`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L119
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sigmoid(Symbol data) {
  return Operator("sigmoid")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes hard sigmoid of x element-wise.
 *
 *        .. math::
 *        y = max(0, min(1, alpha * x + beta))
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L161
 * \param data The input array.
 * \param alpha Slope of hard sigmoid
 * \param beta Bias of hard sigmoid.
 * \return new symbol
 */
inline Symbol hard_sigmoid(Symbol data,
                           mx_float alpha = 0.200000003,
                           mx_float beta = 0.5) {
  return Operator("hard_sigmoid")
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes softsign of x element-wise.
 *
 *        .. math::
 *        y = x / (1 + abs(x))
 *
 *        The storage type of ``softsign`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L191
 * \param data The input array.
 * \return new symbol
 */
inline Symbol softsign(Symbol data) {
  return Operator("softsign")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns a copy of the input.
 *
 *        From:src/operator/tensor/elemwise_unary_op_basic.cc:246
 * \param data The input array.
 * \return new symbol
 */
inline Symbol _copy(Symbol data) {
  return Operator("_copy")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Stops gradient computation.
 *
 *        Stops the accumulated gradient of the inputs from flowing through this operator
 *        in the backward direction. In other words, this operator prevents the
 *        of its inputs to be taken into account for computing gradients.
 *
 *        Example::
 *
 *        v1 = [1, 2]
 *        v2 = [0, 1]
 *        a = Variable('a')
 *        b = Variable('b')
 *        b_stop_grad = stop_gradient(3 * b)
 *        loss = MakeLoss(b_stop_grad + a)
 *
 *        executor = loss.simple_bind(ctx=cpu(), a=(1,2), b=(1,2))
 *        executor.forward(is_train=True, a=v1, b=v2)
 *        executor.outputs
 *        [ 1.  5.]
 *
 *        executor.backward()
 *        executor.grad_arrays
 *        [ 0.  0.]
 *        [ 1.  1.]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L327
 * \param data The input array.
 * \return new symbol
 */
inline Symbol BlockGrad(Symbol data) {
  return Operator("BlockGrad")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Make your own loss function in network construction.
 *
 *        This operator accepts a customized loss function symbol as a terminal loss and
 *        the symbol should be an operator with no backward dependency.
 *        The output of this function is the gradient of loss with respect to the input
 *
 *        For example, if you are a making a cross entropy loss function. Assume ``out``
 *        predicted output and ``label`` is the true label, then the cross entropy can be
 *
 *        cross_entropy = label * log(out) + (1 - label) * log(1 - out)
 *        loss = make_loss(cross_entropy)
 *
 *        We will need to use ``make_loss`` when we are creating our own loss function or
 *        combine multiple loss functions. Also we may want to stop some variables'
 *        from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.
 *
 *        The storage type of ``make_loss`` output depends upon the input storage type:
 *
 *        - make_loss(default) = default
 *        - make_loss(row_sparse) = row_sparse
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L360
 * \param data The input array.
 * \return new symbol
 */
inline Symbol make_loss(Symbol data) {
  return Operator("make_loss")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs First input.
 * \param rhs Second input.
 * \return new symbol
 */
inline Symbol _identity_with_attr_like_rhs(Symbol lhs,
                                           Symbol rhs) {
  return Operator("_identity_with_attr_like_rhs")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Reshape some or all dimensions of `lhs` to have the same shape as some or all
 *
 *        Returns a **view** of the `lhs` array with a new shape without altering any
 *
 *        Example::
 *
 *        x = [1, 2, 3, 4, 5, 6]
 *        y = [[0, -4], [3, 2], [2, 2]]
 *        reshape_like(x, y) = [[1, 2], [3, 4], [5, 6]]
 *
 *        More precise control over how dimensions are inherited is achieved by
 *        slices over the `lhs` and `rhs` array dimensions. Only the sliced `lhs`
 *        are reshaped to the `rhs` sliced dimensions, with the non-sliced `lhs`
 *
 *        Examples::
 *
 *        - lhs shape = (30,7), rhs shape = (15,2,4), lhs_begin=0, lhs_end=1,
 *        - lhs shape = (3, 5), rhs shape = (1,15,4), lhs_begin=0, lhs_end=2,
 *
 *        Negative indices are supported, and `None` can be used for either `lhs_end` or
 *
 *        Example::
 *
 *        - lhs shape = (30, 12), rhs shape = (4, 2, 2, 3), lhs_begin=-1, lhs_end=None,
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L513
 * \param lhs First input.
 * \param rhs Second input.
 * \return new symbol
 */
inline Symbol reshape_like(Symbol lhs,
                           Symbol rhs) {
  return Operator("reshape_like")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns a 1D int64 array containing the shape of data.
 *
 *        Example::
 *
 *        shape_array([[1,2,3,4], [5,6,7,8]]) = [2,4]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L572
 * \param data Input Array.
 * \param lhs_begin Defaults to 0. The beginning index along which the lhs dimensions are
 * \param lhs_end Defaults to None. The ending index along which the lhs dimensions are
 * \param rhs_begin Defaults to 0. The beginning index along which the rhs dimensions are
 * \param rhs_end Defaults to None. The ending index along which the rhs dimensions are
 * \return new symbol
 */
inline Symbol shape_array(Symbol data,
                          dmlc::optional<int> lhs_begin = dmlc::optional<int>(),
                          dmlc::optional<int> lhs_end = dmlc::optional<int>(),
                          dmlc::optional<int> rhs_begin = dmlc::optional<int>(),
                          dmlc::optional<int> rhs_end = dmlc::optional<int>()) {
  return Operator("shape_array")
           .SetParam("lhs_begin", lhs_begin)
           .SetParam("lhs_end", lhs_end)
           .SetParam("rhs_begin", rhs_begin)
           .SetParam("rhs_end", rhs_end)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns a 1D int64 array containing the size of data.
 *
 *        Example::
 *
 *        size_array([[1,2,3,4], [5,6,7,8]]) = [8]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L624
 * \param data Input Array.
 * \return new symbol
 */
inline Symbol size_array(Symbol data) {
  return Operator("size_array")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Casts all elements of the input to a new type.
 *
 *        .. note:: ``Cast`` is deprecated. Use ``cast`` instead.
 *
 *        Example::
 *
 *        cast([0.9, 1.3], dtype='int32') = [0, 1]
 *        cast([1e20, 11.1], dtype='float16') = [inf, 11.09375]
 *        cast([300, 11.1, 10.9, -1, -3], dtype='uint8') = [44, 11, 10, 255, 253]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L662
 * \param data The input.
 * \param dtype Output data type.
 * \return new symbol
 */
inline Symbol Cast(Symbol data,
                   CastDtype dtype) {
  static const char *CastDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("Cast")
           .SetParam("dtype", CastDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Numerical negative of the argument, element-wise.
 *
 *        The storage type of ``negative`` output depends upon the input storage type:
 *
 *        - negative(default) = default
 *        - negative(row_sparse) = row_sparse
 *        - negative(csr) = csr
 *
 *
 * \param data The input array.
 * \return new symbol
 */
inline Symbol negative(Symbol data) {
  return Operator("negative")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the reciprocal of the argument, element-wise.
 *
 *        Calculates 1/x.
 *
 *        Example::
 *
 *        reciprocal([-2, 1, 3, 1.6, 0.2]) = [-0.5, 1.0, 0.33333334, 0.625, 5.0]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L714
 * \param data The input array.
 * \return new symbol
 */
inline Symbol reciprocal(Symbol data) {
  return Operator("reciprocal")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise absolute value of the input.
 *
 *        Example::
 *
 *        abs([-2, 0, 3]) = [2, 0, 3]
 *
 *        The storage type of ``abs`` output depends upon the input storage type:
 *
 *        - abs(default) = default
 *        - abs(row_sparse) = row_sparse
 *        - abs(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L767
 * \param data The input array.
 * \return new symbol
 */
inline Symbol abs(Symbol data) {
  return Operator("abs")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise sign of the input.
 *
 *        Example::
 *
 *        sign([-2, 0, 3]) = [-1, 0, 1]
 *
 *        The storage type of ``sign`` output depends upon the input storage type:
 *
 *        - sign(default) = default
 *        - sign(row_sparse) = row_sparse
 *        - sign(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L805
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sign(Symbol data) {
  return Operator("sign")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise rounded value to the nearest integer of the input.
 *
 *        Example::
 *
 *        round([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  2., -2.,  2.,  2.]
 *
 *        The storage type of ``round`` output depends upon the input storage type:
 *
 *        - round(default) = default
 *        - round(row_sparse) = row_sparse
 *        - round(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L824
 * \param data The input array.
 * \return new symbol
 */
inline Symbol round(Symbol data) {
  return Operator("round")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise rounded value to the nearest integer of the input.
 *
 *        .. note::
 *        - For input ``n.5`` ``rint`` returns ``n`` while ``round`` returns ``n+1``.
 *        - For input ``-n.5`` both ``rint`` and ``round`` returns ``-n-1``.
 *
 *        Example::
 *
 *        rint([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  1., -2.,  2.,  2.]
 *
 *        The storage type of ``rint`` output depends upon the input storage type:
 *
 *        - rint(default) = default
 *        - rint(row_sparse) = row_sparse
 *        - rint(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L845
 * \param data The input array.
 * \return new symbol
 */
inline Symbol rint(Symbol data) {
  return Operator("rint")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise ceiling of the input.
 *
 *        The ceil of the scalar x is the smallest integer i, such that i >= x.
 *
 *        Example::
 *
 *        ceil([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  2.,  2.,  3.]
 *
 *        The storage type of ``ceil`` output depends upon the input storage type:
 *
 *        - ceil(default) = default
 *        - ceil(row_sparse) = row_sparse
 *        - ceil(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L864
 * \param data The input array.
 * \return new symbol
 */
inline Symbol ceil(Symbol data) {
  return Operator("ceil")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise floor of the input.
 *
 *        The floor of the scalar x is the largest integer i, such that i <= x.
 *
 *        Example::
 *
 *        floor([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-3., -2.,  1.,  1.,  2.]
 *
 *        The storage type of ``floor`` output depends upon the input storage type:
 *
 *        - floor(default) = default
 *        - floor(row_sparse) = row_sparse
 *        - floor(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L883
 * \param data The input array.
 * \return new symbol
 */
inline Symbol floor(Symbol data) {
  return Operator("floor")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Return the element-wise truncated value of the input.
 *
 *        The truncated value of the scalar x is the nearest integer i which is closer to
 *        zero than x is. In short, the fractional part of the signed number x is
 *
 *        Example::
 *
 *        trunc([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  1.,  1.,  2.]
 *
 *        The storage type of ``trunc`` output depends upon the input storage type:
 *
 *        - trunc(default) = default
 *        - trunc(row_sparse) = row_sparse
 *        - trunc(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L903
 * \param data The input array.
 * \return new symbol
 */
inline Symbol trunc(Symbol data) {
  return Operator("trunc")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise rounded value to the nearest \
 *        integer towards zero of the input.
 *
 *        Example::
 *
 *        fix([-2.1, -1.9, 1.9, 2.1]) = [-2., -1.,  1., 2.]
 *
 *        The storage type of ``fix`` output depends upon the input storage type:
 *
 *        - fix(default) = default
 *        - fix(row_sparse) = row_sparse
 *        - fix(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L921
 * \param data The input array.
 * \return new symbol
 */
inline Symbol fix(Symbol data) {
  return Operator("fix")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise squared value of the input.
 *
 *        .. math::
 *        square(x) = x^2
 *
 *        Example::
 *
 *        square([2, 3, 4]) = [4, 9, 16]
 *
 *        The storage type of ``square`` output depends upon the input storage type:
 *
 *        - square(default) = default
 *        - square(row_sparse) = row_sparse
 *        - square(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L961
 * \param data The input array.
 * \return new symbol
 */
inline Symbol square(Symbol data) {
  return Operator("square")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise square-root value of the input.
 *
 *        .. math::
 *        \textrm{sqrt}(x) = \sqrt{x}
 *
 *        Example::
 *
 *        sqrt([4, 9, 16]) = [2, 3, 4]
 *
 *        The storage type of ``sqrt`` output depends upon the input storage type:
 *
 *        - sqrt(default) = default
 *        - sqrt(row_sparse) = row_sparse
 *        - sqrt(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L985
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sqrt(Symbol data) {
  return Operator("sqrt")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise inverse square-root value of the input.
 *
 *        .. math::
 *        rsqrt(x) = 1/\sqrt{x}
 *
 *        Example::
 *
 *        rsqrt([4,9,16]) = [0.5, 0.33333334, 0.25]
 *
 *        The storage type of ``rsqrt`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1005
 * \param data The input array.
 * \return new symbol
 */
inline Symbol rsqrt(Symbol data) {
  return Operator("rsqrt")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise cube-root value of the input.
 *
 *        .. math::
 *        cbrt(x) = \sqrt[3]{x}
 *
 *        Example::
 *
 *        cbrt([1, 8, -125]) = [1, 2, -5]
 *
 *        The storage type of ``cbrt`` output depends upon the input storage type:
 *
 *        - cbrt(default) = default
 *        - cbrt(row_sparse) = row_sparse
 *        - cbrt(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1028
 * \param data The input array.
 * \return new symbol
 */
inline Symbol cbrt(Symbol data) {
  return Operator("cbrt")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise gauss error function of the input.
 *
 *        Example::
 *
 *        erf([0, -1., 10.]) = [0., -0.8427, 1.]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1042
 * \param data The input array.
 * \return new symbol
 */
inline Symbol erf(Symbol data) {
  return Operator("erf")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise inverse gauss error function of the input.
 *
 *        Example::
 *
 *        erfinv([0, 0.5., -1.]) = [0., 0.4769, -inf]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1063
 * \param data The input array.
 * \return new symbol
 */
inline Symbol erfinv(Symbol data) {
  return Operator("erfinv")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise inverse cube-root value of the input.
 *
 *        .. math::
 *        rcbrt(x) = 1/\sqrt[3]{x}
 *
 *        Example::
 *
 *        rcbrt([1,8,-125]) = [1.0, 0.5, -0.2]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1082
 * \param data The input array.
 * \return new symbol
 */
inline Symbol rcbrt(Symbol data) {
  return Operator("rcbrt")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise exponential value of the input.
 *
 *        .. math::
 *        exp(x) = e^x \approx 2.718^x
 *
 *        Example::
 *
 *        exp([0, 1, 2]) = [1., 2.71828175, 7.38905621]
 *
 *        The storage type of ``exp`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1122
 * \param data The input array.
 * \return new symbol
 */
inline Symbol exp(Symbol data) {
  return Operator("exp")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise Natural logarithmic value of the input.
 *
 *        The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x``
 *
 *        The storage type of ``log`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1135
 * \param data The input array.
 * \return new symbol
 */
inline Symbol log(Symbol data) {
  return Operator("log")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise Base-10 logarithmic value of the input.
 *
 *        ``10**log10(x) = x``
 *
 *        The storage type of ``log10`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1152
 * \param data The input array.
 * \return new symbol
 */
inline Symbol log10(Symbol data) {
  return Operator("log10")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise Base-2 logarithmic value of the input.
 *
 *        ``2**log2(x) = x``
 *
 *        The storage type of ``log2`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1164
 * \param data The input array.
 * \return new symbol
 */
inline Symbol log2(Symbol data) {
  return Operator("log2")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise ``log(1 + x)`` value of the input.
 *
 *        This function is more accurate than ``log(1 + x)``  for small ``x`` so that
 *        :math:`1+x\approx 1`
 *
 *        The storage type of ``log1p`` output depends upon the input storage type:
 *
 *        - log1p(default) = default
 *        - log1p(row_sparse) = row_sparse
 *        - log1p(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1265
 * \param data The input array.
 * \return new symbol
 */
inline Symbol log1p(Symbol data) {
  return Operator("log1p")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns ``exp(x) - 1`` computed element-wise on the input.
 *
 *        This function provides greater precision than ``exp(x) - 1`` for small values
 *
 *        The storage type of ``expm1`` output depends upon the input storage type:
 *
 *        - expm1(default) = default
 *        - expm1(row_sparse) = row_sparse
 *        - expm1(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1283
 * \param data The input array.
 * \return new symbol
 */
inline Symbol expm1(Symbol data) {
  return Operator("expm1")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the gamma function (extension of the factorial function \
 *        to the reals), computed element-wise on the input array.
 *
 *        The storage type of ``gamma`` output is always dense
 *
 *
 * \param data The input array.
 * \return new symbol
 */
inline Symbol gamma(Symbol data) {
  return Operator("gamma")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise log of the absolute value of the gamma function \
 *        of the input.
 *
 *        The storage type of ``gammaln`` output is always dense
 *
 *
 * \param data The input array.
 * \return new symbol
 */
inline Symbol gammaln(Symbol data) {
  return Operator("gammaln")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of logical NOT (!) function
 *
 *        Example:
 *        logical_not([-2., 0., 1.]) = [0., 1., 0.]
 *
 *
 * \param data The input array.
 * \return new symbol
 */
inline Symbol logical_not(Symbol data) {
  return Operator("logical_not")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Pick rows specified by user input index array from a row sparse matrix
 *        and save them in the output sparse matrix.
 *
 *        Example::
 *
 *        data = [[1, 2], [3, 4], [5, 6]]
 *        indices = [0, 1, 3]
 *        shape = (4, 2)
 *        rsp_in = row_sparse_array(data, indices)
 *        to_retain = [0, 3]
 *        rsp_out = retain(rsp_in, to_retain)
 *        rsp_out.data = [[1, 2], [5, 6]]
 *        rsp_out.indices = [0, 3]
 *
 *        The storage type of ``retain`` output depends on storage types of inputs
 *
 *        - retain(row_sparse, default) = row_sparse
 *        - otherwise, ``retain`` is not supported
 *
 *
 *
 *        Defined in src/operator/tensor/sparse_retain.cc:L53
 * \param data The input array for sparse_retain operator.
 * \param indices The index array of rows ids that will be retained.
 * \return new symbol
 */
inline Symbol _sparse_retain(Symbol data,
                             Symbol indices) {
  return Operator("_sparse_retain")
           .SetInput("data", data)
           .SetInput("indices", indices)
           .CreateSymbol();
}

/*!
 * \brief Cast function between low precision float/FP32 used by AMP.
 *
 *        It casts only between low precision float/FP32 and does not do anything for
 *
 *
 *        Defined in src/operator/tensor/amp_cast.cc:L37
 * \param data The input.
 * \param dtype Output data type.
 * \return new symbol
 */
inline Symbol amp_cast(Symbol data,
                       Amp_castDtype dtype) {
  static const char *Amp_castDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("amp_cast")
           .SetParam("dtype", Amp_castDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Cast function used by AMP, that casts its inputs to the common widest type.
 *
 *        It casts only between low precision float/FP32 and does not do anything for
 *
 *
 *
 *        Defined in src/operator/tensor/amp_cast.cc:L71
 * \param data Weights
 * \param num_outputs Number of input/output pairs to be casted to the widest type.
 * \return new symbol
 */
inline Symbol amp_multicast(const std::vector<Symbol>& data,
                            int num_outputs) {
  return Operator("amp_multicast")
           .SetParam("num_outputs", num_outputs)
(data)
           .CreateSymbol();
}

/*!
 * \brief Returns the top *k* elements in an input array along the given axis.
 *        The returned elements will be sorted.
 *
 *        Examples::
 *
 *        x = [[ 0.3,  0.2,  0.4],
 *        [ 0.1,  0.3,  0.2]]
 *
 *        // returns an index of the largest element on last axis
 *        topk(x) = [[ 2.],
 *        [ 1.]]
 *
 *        // returns the value of top-2 largest elements on last axis
 *        topk(x, ret_typ='value', k=2) = [[ 0.4,  0.3],
 *        [ 0.3,  0.2]]
 *
 *        // returns the value of top-2 smallest elements on last axis
 *        topk(x, ret_typ='value', k=2, is_ascend=1) = [[ 0.2 ,  0.3],
 *        [ 0.1 ,  0.2]]
 *
 *        // returns the value of top-2 largest elements on axis 0
 *        topk(x, axis=0, ret_typ='value', k=2) = [[ 0.3,  0.3,  0.4],
 *        [ 0.1,  0.2,  0.2]]
 *
 *        // flattens and then returns list of both values and indices
 *        topk(x, ret_typ='both', k=2) = [[[ 0.4,  0.3], [ 0.3,  0.2]] ,  [[ 2.,  0.], [
 *
 *
 *
 *        Defined in src/operator/tensor/ordering_op.cc:L64
 * \param data The input array
 * \param axis Axis along which to choose the top k indices. If not given, the flattened
 * \param k Number of top elements to select, should be always smaller than or equal to
 * \param ret_typ The return type.
 *        "value" means to return the top k values, "indices" means to return the indices
 *        of the top k values, "mask" means to return a mask array containing 0 and 1. 1
 *        means the top k values. "both" means to return a list of both values and
 * \param is_ascend Whether to choose k largest or k smallest elements. Top K largest
 * \param dtype DType of the output indices when ret_typ is "indices" or "both". An error
 * \return new symbol
 */
inline Symbol topk(Symbol data,
                   dmlc::optional<int> axis = dmlc::optional<int>(-1),
                   int k = 1,
                   TopkRetTyp ret_typ = TopkRetTyp::kIndices,
                   bool is_ascend = false,
                   TopkDtype dtype = TopkDtype::kFloat32) {
  static const char *TopkRetTypValues[] = {
    "both",
    "indices",
    "mask",
    "value"
  };
  static const char *TopkDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "uint8"
  };
  return Operator("topk")
           .SetParam("axis", axis)
           .SetParam("k", k)
           .SetParam("ret_typ", TopkRetTypValues[int(ret_typ)])
           .SetParam("is_ascend", is_ascend)
           .SetParam("dtype", TopkDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns a sorted copy of an input array along the given axis.
 *
 *        Examples::
 *
 *        x = [[ 1, 4],
 *        [ 3, 1]]
 *
 *        // sorts along the last axis
 *        sort(x) = [[ 1.,  4.],
 *        [ 1.,  3.]]
 *
 *        // flattens and then sorts
 *        sort(x, axis=None) = [ 1.,  1.,  3.,  4.]
 *
 *        // sorts along the first axis
 *        sort(x, axis=0) = [[ 1.,  1.],
 *        [ 3.,  4.]]
 *
 *        // in a descend order
 *        sort(x, is_ascend=0) = [[ 4.,  1.],
 *        [ 3.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/ordering_op.cc:L127
 * \param data The input array
 * \param axis Axis along which to choose sort the input tensor. If not given, the
 * \param is_ascend Whether to sort in ascending or descending order.
 * \return new symbol
 */
inline Symbol sort(Symbol data,
                   dmlc::optional<int> axis = dmlc::optional<int>(-1),
                   bool is_ascend = true) {
  return Operator("sort")
           .SetParam("axis", axis)
           .SetParam("is_ascend", is_ascend)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the indices that would sort an input array along the given axis.
 *
 *        This function performs sorting along the given axis and returns an array of
 *        as an input array that index data in sorted order.
 *
 *        Examples::
 *
 *        x = [[ 0.3,  0.2,  0.4],
 *        [ 0.1,  0.3,  0.2]]
 *
 *        // sort along axis -1
 *        argsort(x) = [[ 1.,  0.,  2.],
 *        [ 0.,  2.,  1.]]
 *
 *        // sort along axis 0
 *        argsort(x, axis=0) = [[ 1.,  0.,  1.]
 *        [ 0.,  1.,  0.]]
 *
 *        // flatten and then sort
 *        argsort(x, axis=None) = [ 3.,  1.,  5.,  0.,  4.,  2.]
 *
 *
 *        Defined in src/operator/tensor/ordering_op.cc:L177
 * \param data The input array
 * \param axis Axis along which to sort the input tensor. If not given, the flattened
 * \param is_ascend Whether to sort in ascending or descending order.
 * \param dtype DType of the output indices. It is only valid when ret_typ is "indices"
 *        or "both". An error will be raised if the selected data type cannot precisely
 * \return new symbol
 */
inline Symbol argsort(Symbol data,
                      dmlc::optional<int> axis = dmlc::optional<int>(-1),
                      bool is_ascend = true,
                      ArgsortDtype dtype = ArgsortDtype::kFloat32) {
  static const char *ArgsortDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "uint8"
  };
  return Operator("argsort")
           .SetParam("axis", axis)
           .SetParam("is_ascend", is_ascend)
           .SetParam("dtype", ArgsortDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _plus_scalar(Symbol data,
                           mx_float scalar) {
  return Operator("_plus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _minus_scalar(Symbol data,
                            mx_float scalar) {
  return Operator("_minus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _rminus_scalar(Symbol data,
                             mx_float scalar) {
  return Operator("_rminus_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Multiply an array with a scalar.
 *
 *        ``_mul_scalar`` only operates on data array of input if input is sparse.
 *
 *        For example, if input of shape (100, 100) has only 2 non zero elements,
 *        i.e. input.data = [5, 6], scalar = nan,
 *        it will result output.data = [nan, nan] instead of 10000 nans.
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_scalar_op_basic.cc:L149
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _mul_scalar(Symbol data,
                          mx_float scalar) {
  return Operator("_mul_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Divide an array with a scalar.
 *
 *        ``_div_scalar`` only operates on data array of input if input is sparse.
 *
 *        For example, if input of shape (100, 100) has only 2 non zero elements,
 *        i.e. input.data = [5, 6], scalar = nan,
 *        it will result output.data = [nan, nan] instead of 10000 nans.
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_scalar_op_basic.cc:L171
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _div_scalar(Symbol data,
                          mx_float scalar) {
  return Operator("_div_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _rdiv_scalar(Symbol data,
                           mx_float scalar) {
  return Operator("_rdiv_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _mod_scalar(Symbol data,
                          mx_float scalar) {
  return Operator("_mod_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _rmod_scalar(Symbol data,
                           mx_float scalar) {
  return Operator("_rmod_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Maps integer indices to vector representations (embeddings).
 *
 *        This operator maps words to real-valued vectors in a high-dimensional space,
 *        called word embeddings. These embeddings can capture semantic and syntactic
 *        For example, it has been noted that in the learned embedding spaces, similar
 *        to be close to each other and dissimilar words far apart.
 *
 *        For an input array of shape (d1, ..., dK),
 *        the shape of an output array is (d1, ..., dK, output_dim).
 *        All the input values should be integers in the range [0, input_dim).
 *
 *        If the input_dim is ip0 and output_dim is op0, then shape of the embedding
 *        (ip0, op0).
 *
 *        By default, if any index mentioned is too large, it is replaced by the index
 *        the last vector in an embedding matrix.
 *
 *        Examples::
 *
 *        input_dim = 4
 *        output_dim = 5
 *
 *        // Each row in weight matrix y represents a word. So, y = (w0,w1,w2,w3)
 *        y = [[  0.,   1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.,   9.],
 *        [ 10.,  11.,  12.,  13.,  14.],
 *        [ 15.,  16.,  17.,  18.,  19.]]
 *
 *        // Input array x represents n-grams(2-gram). So, x = [(w1,w3), (w0,w2)]
 *        x = [[ 1.,  3.],
 *        [ 0.,  2.]]
 *
 *        // Mapped input x to its vector representation y.
 *        Embedding(x, y, 4, 5) = [[[  5.,   6.,   7.,   8.,   9.],
 *        [ 15.,  16.,  17.,  18.,  19.]],
 *
 *        [[  0.,   1.,   2.,   3.,   4.],
 *        [ 10.,  11.,  12.,  13.,  14.]]]
 *
 *
 *        The storage type of weight can be either row_sparse or default.
 *
 *        .. Note::
 *
 *        If "sparse_grad" is set to True, the storage type of gradient w.r.t weights
 *        "row_sparse". Only a subset of optimizers support sparse gradients, including
 *        and Adam. Note that by default lazy updates is turned on, which may perform
 *        from standard updates. For more details, please check the Optimization API at:
 *        https://mxnet.incubator.apache.org/api/python/optimization/optimization.html
 *
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L519
 * \param data The input array to the embedding operator.
 * \param weight The embedding weight matrix.
 * \param input_dim Vocabulary size of the input indices.
 * \param output_dim Dimension of the embedding vectors.
 * \param dtype Data type of weight.
 * \param sparse_grad Compute row sparse gradient in the backward calculation. If set to
 * \return new symbol
 */
inline Symbol Embedding(Symbol data,
                        Symbol weight,
                        int input_dim,
                        int output_dim,
                        EmbeddingDtype dtype = EmbeddingDtype::kFloat32,
                        bool sparse_grad = false) {
  static const char *EmbeddingDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("Embedding")
           .SetParam("input_dim", input_dim)
           .SetParam("output_dim", output_dim)
           .SetParam("dtype", EmbeddingDtypeValues[int(dtype)])
           .SetParam("sparse_grad", sparse_grad)
           .SetInput("data", data)
           .SetInput("weight", weight)
           .CreateSymbol();
}

/*!
 * \brief Maps integer indices to vector representations (embeddings).
 *
 *        note:: ``contrib.SparseEmbedding`` is deprecated, use ``Embedding`` instead.
 *
 *        This operator maps words to real-valued vectors in a high-dimensional space,
 *        called word embeddings. These embeddings can capture semantic and syntactic
 *        For example, it has been noted that in the learned embedding spaces, similar
 *        to be close to each other and dissimilar words far apart.
 *
 *        For an input array of shape (d1, ..., dK),
 *        the shape of an output array is (d1, ..., dK, output_dim).
 *        All the input values should be integers in the range [0, input_dim).
 *
 *        If the input_dim is ip0 and output_dim is op0, then shape of the embedding
 *        (ip0, op0).
 *
 *        The storage type of the gradient will be `row_sparse`.
 *
 *        .. Note::
 *
 *        `SparseEmbedding` is designed for the use case where `input_dim` is very large
 *        The operator is available on both CPU and GPU.
 *        When `deterministic` is set to `True`, the accumulation of gradients follows a
 *        deterministic order if a feature appears multiple times in the input. However,
 *        accumulation is usually slower when the order is enforced on GPU.
 *        When the operator is used on the GPU, the recommended value for `deterministic`
 *
 *        Examples::
 *
 *        input_dim = 4
 *        output_dim = 5
 *
 *        // Each row in weight matrix y represents a word. So, y = (w0,w1,w2,w3)
 *        y = [[  0.,   1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.,   9.],
 *        [ 10.,  11.,  12.,  13.,  14.],
 *        [ 15.,  16.,  17.,  18.,  19.]]
 *
 *        // Input array x represents n-grams(2-gram). So, x = [(w1,w3), (w0,w2)]
 *        x = [[ 1.,  3.],
 *        [ 0.,  2.]]
 *
 *        // Mapped input x to its vector representation y.
 *        SparseEmbedding(x, y, 4, 5) = [[[  5.,   6.,   7.,   8.,   9.],
 *        [ 15.,  16.,  17.,  18.,  19.]],
 *
 *        [[  0.,   1.,   2.,   3.,   4.],
 *        [ 10.,  11.,  12.,  13.,  14.]]]
 *
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L595
 * \param data The input array to the embedding operator.
 * \param weight The embedding weight matrix.
 * \param input_dim Vocabulary size of the input indices.
 * \param output_dim Dimension of the embedding vectors.
 * \param dtype Data type of weight.
 * \param sparse_grad Compute row sparse gradient in the backward calculation. If set to
 * \return new symbol
 */
inline Symbol _contrib_SparseEmbedding(Symbol data,
                                       Symbol weight,
                                       int input_dim,
                                       int output_dim,
                                       _contrib_SparseEmbeddingDtype dtype = _contrib_SparseEmbeddingDtype::kFloat32,
                                       bool sparse_grad = false) {
  static const char *_contrib_SparseEmbeddingDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("_contrib_SparseEmbedding")
           .SetParam("input_dim", input_dim)
           .SetParam("output_dim", output_dim)
           .SetParam("dtype", _contrib_SparseEmbeddingDtypeValues[int(dtype)])
           .SetParam("sparse_grad", sparse_grad)
           .SetInput("data", data)
           .SetInput("weight", weight)
           .CreateSymbol();
}

/*!
 * \brief Takes elements from an input array along the given axis.
 *
 *        This function slices the input array along a particular axis with the provided
 *
 *        Given data tensor of rank r >= 1, and indices tensor of rank q, gather entries
 *        dimension of data (by default outer-most one as axis=0) indexed by indices, and
 *        in an output tensor of rank q + (r - 1).
 *
 *        Examples::
 *
 *        x = [4.  5.  6.]
 *
 *        // Trivial case, take the second element along the first axis.
 *
 *        take(x, [1]) = [ 5. ]
 *
 *        // The other trivial case, axis=-1, take the third element along the first axis
 *
 *        take(x, [3], axis=-1, mode='clip') = [ 6. ]
 *
 *        x = [[ 1.,  2.],
 *        [ 3.,  4.],
 *        [ 5.,  6.]]
 *
 *        // In this case we will get rows 0 and 1, then 1 and 2. Along axis 0
 *
 *        take(x, [[0,1],[1,2]]) = [[[ 1.,  2.],
 *        [ 3.,  4.]],
 *
 *        [[ 3.,  4.],
 *        [ 5.,  6.]]]
 *
 *        // In this case we will get rows 0 and 1, then 1 and 2 (calculated by wrapping
 *        // Along axis 1
 *
 *        take(x, [[0, 3], [-1, -2]], axis=1, mode='wrap') = [[[ 1.  2.]
 *        [ 2.  1.]]
 *
 *        [[ 3.  4.]
 *        [ 4.  3.]]
 *
 *        [[ 5.  6.]
 *        [ 6.  5.]]]
 *
 *        The storage type of ``take`` output depends upon the input storage type:
 *
 *        - take(default, default) = default
 *        - take(csr, default, axis=0) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L695
 * \param a The input array.
 * \param indices The indices of the values to be extracted.
 * \param axis The axis of input array to be taken.For input tensor of rank r, it could
 * \param mode Specify how out-of-bound indices bahave. Default is "clip". "clip" means
 *        clip to the range. So, if all indices mentioned are too large, they are
 *        replaced by the index that addresses the last element along an axis.  "wrap"
 * \return new symbol
 */
inline Symbol take(Symbol a,
                   Symbol indices,
                   int axis = 0,
                   TakeMode mode = TakeMode::kClip) {
  static const char *TakeModeValues[] = {
    "clip",
    "raise",
    "wrap"
  };
  return Operator("take")
           .SetParam("axis", axis)
           .SetParam("mode", TakeModeValues[int(mode)])
           .SetInput("a", a)
           .SetInput("indices", indices)
           .CreateSymbol();
}

/*!
 * \brief Takes elements from a data batch.
 *
 *        .. note::
 *        `batch_take` is deprecated. Use `pick` instead.
 *
 *        Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
 *        an output array of shape ``(i0,)`` with::
 *
 *        output[i] = input[i, indices[i]]
 *
 *        Examples::
 *
 *        x = [[ 1.,  2.],
 *        [ 3.,  4.],
 *        [ 5.,  6.]]
 *
 *        // takes elements with specified indices
 *        batch_take(x, [0,1,0]) = [ 1.  4.  5.]
 *
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L753
 * \param a The input array
 * \param indices The index array
 * \return new symbol
 */
inline Symbol batch_take(Symbol a,
                         Symbol indices) {
  return Operator("batch_take")
           .SetInput("a", a)
           .SetInput("indices", indices)
           .CreateSymbol();
}

/*!
 * \brief Returns a one-hot array.
 *
 *        The locations represented by `indices` take value `on_value`, while all
 *        other locations take value `off_value`.
 *
 *        `one_hot` operation with `indices` of shape ``(i0, i1)`` and `depth`  of ``d``
 *        in an output array of shape ``(i0, i1, d)`` with::
 *
 *        output[i,j,:] = off_value
 *        output[i,j,indices[i,j]] = on_value
 *
 *        Examples::
 *
 *        one_hot([1,0,2,0], 3) = [[ 0.  1.  0.]
 *        [ 1.  0.  0.]
 *        [ 0.  0.  1.]
 *        [ 1.  0.  0.]]
 *
 *        one_hot([1,0,2,0], 3, on_value=8, off_value=1,
 *        dtype='int32') = [[1 8 1]
 *        [8 1 1]
 *        [1 1 8]
 *        [8 1 1]]
 *
 *        one_hot([[1,0],[1,0],[2,0]], 3) = [[[ 0.  1.  0.]
 *        [ 1.  0.  0.]]
 *
 *        [[ 0.  1.  0.]
 *        [ 1.  0.  0.]]
 *
 *        [[ 0.  0.  1.]
 *        [ 1.  0.  0.]]]
 *
 *
 *        Defined in src/operator/tensor/indexing_op.cc:L799
 * \param indices array of locations where to set on_value
 * \param depth Depth of the one hot dimension.
 * \param on_value The value assigned to the locations represented by indices.
 * \param off_value The value assigned to the locations not represented by indices.
 * \param dtype DType of the output
 * \return new symbol
 */
inline Symbol one_hot(Symbol indices,
                      int depth,
                      double on_value = 1,
                      double off_value = 0,
                      One_hotDtype dtype = One_hotDtype::kFloat32) {
  static const char *One_hotDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8",
    "uint8"
  };
  return Operator("one_hot")
           .SetParam("depth", depth)
           .SetParam("on_value", on_value)
           .SetParam("off_value", off_value)
           .SetParam("dtype", One_hotDtypeValues[int(dtype)])
           .SetInput("indices", indices)
           .CreateSymbol();
}

/*!
 * \brief Gather elements or slices from `data` and store to a tensor whose
 *        shape is defined by `indices`.
 *
 *        Given `data` with shape `(X_0, X_1, ..., X_{N-1})` and indices with shape
 *        `(M, Y_0, ..., Y_{K-1})`, the output will have shape `(Y_0, ..., Y_{K-1}, X_M,
 *        where `M <= N`. If `M == N`, output shape will simply be `(Y_0, ..., Y_{K-1})`.
 *
 *        The elements in output is defined as follows::
 *
 *        output[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}] = data[indices[0, y_0, ...,
 *        ...,
 *        indices[M-1, y_0, ..., y_{K-1}],
 *        x_M, ..., x_{N-1}]
 *
 *        Examples::
 *
 *        data = [[0, 1], [2, 3]]
 *        indices = [[1, 1, 0], [0, 1, 0]]
 *        gather_nd(data, indices) = [2, 3, 0]
 *
 *        data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
 *        indices = [[0, 1], [1, 0]]
 *        gather_nd(data, indices) = [[3, 4], [5, 6]]
 *
 *
 * \param data data
 * \param indices indices
 * \return new symbol
 */
inline Symbol gather_nd(Symbol data,
                        Symbol indices) {
  return Operator("gather_nd")
           .SetInput("data", data)
           .SetInput("indices", indices)
           .CreateSymbol();
}

/*!
 * \brief Scatters data into a new tensor according to indices.
 *
 *        Given `data` with shape `(Y_0, ..., Y_{K-1}, X_M, ..., X_{N-1})` and indices
 *        `(M, Y_0, ..., Y_{K-1})`, the output will have shape `(X_0, X_1, ..., X_{N-1})`,
 *        where `M <= N`. If `M == N`, data shape should simply be `(Y_0, ..., Y_{K-1})`.
 *
 *        The elements in output is defined as follows::
 *
 *        output[indices[0, y_0, ..., y_{K-1}],
 *        ...,
 *        indices[M-1, y_0, ..., y_{K-1}],
 *        x_M, ..., x_{N-1}] = data[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}]
 *
 *        all other entries in output are 0.
 *
 *        .. warning::
 *
 *        If the indices have duplicates, the result will be non-deterministic and
 *        the gradient of `scatter_nd` will not be correct!!
 *
 *
 *        Examples::
 *
 *        data = [2, 3, 0]
 *        indices = [[1, 1, 0], [0, 1, 0]]
 *        shape = (2, 2)
 *        scatter_nd(data, indices, shape) = [[0, 0], [2, 3]]
 *
 *        data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
 *        indices = [[0, 1], [1, 1]]
 *        shape = (2, 2, 2, 2)
 *        scatter_nd(data, indices, shape) = [[[[0, 0],
 *        [0, 0]],
 *
 *        [[1, 2],
 *        [3, 4]]],
 *
 *        [[[0, 0],
 *        [0, 0]],
 *
 *        [[5, 6],
 *        [7, 8]]]]
 *
 *
 * \param data data
 * \param indices indices
 * \param shape Shape of output.
 * \return new symbol
 */
inline Symbol scatter_nd(Symbol data,
                         Symbol indices,
                         Shape shape) {
  return Operator("scatter_nd")
           .SetParam("shape", shape)
           .SetInput("data", data)
           .SetInput("indices", indices)
           .CreateSymbol();
}

/*!
 * \brief This operator has the same functionality as scatter_nd
 *        except that it does not reset the elements not indexed by the input
 *        index `NDArray` in the input data `NDArray`. output should be explicitly
 *        given and be the same as lhs.
 *
 *        .. note:: This operator is for internal use only.
 *
 *        Examples::
 *
 *        data = [2, 3, 0]
 *        indices = [[1, 1, 0], [0, 1, 0]]
 *        out = [[1, 1], [1, 1]]
 *        _scatter_set_nd(lhs=out, rhs=data, indices=indices, out=out)
 *        out = [[0, 1], [2, 3]]
 *
 *
 * \param lhs source input
 * \param rhs value to assign
 * \param indices indices
 * \param shape Shape of output.
 * \return new symbol
 */
inline Symbol _scatter_set_nd(Symbol lhs,
                              Symbol rhs,
                              Symbol indices,
                              Shape shape) {
  return Operator("_scatter_set_nd")
           .SetParam("shape", shape)
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .SetInput("indices", indices)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of element-wise **equal to** (==) comparison operation with
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_equal(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L46
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_equal(Symbol lhs,
                              Symbol rhs) {
  return Operator("broadcast_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of element-wise **not equal to** (!=) comparison operation
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_not_equal(x, y) = [[ 1.,  1.,  1.],
 *        [ 0.,  0.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L64
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_not_equal(Symbol lhs,
                                  Symbol rhs) {
  return Operator("broadcast_not_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of element-wise **greater than** (>) comparison operation
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_greater(x, y) = [[ 1.,  1.,  1.],
 *        [ 0.,  0.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L82
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_greater(Symbol lhs,
                                Symbol rhs) {
  return Operator("broadcast_greater")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of element-wise **greater than or equal to** (>=) comparison
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_greater_equal(x, y) = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L100
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_greater_equal(Symbol lhs,
                                      Symbol rhs) {
  return Operator("broadcast_greater_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of element-wise **lesser than** (<) comparison operation
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_lesser(x, y) = [[ 0.,  0.,  0.],
 *        [ 0.,  0.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L118
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_lesser(Symbol lhs,
                               Symbol rhs) {
  return Operator("broadcast_lesser")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of element-wise **lesser than or equal to** (<=) comparison
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_lesser_equal(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L136
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_lesser_equal(Symbol lhs,
                                     Symbol rhs) {
  return Operator("broadcast_lesser_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of element-wise **logical and** with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  1.]]
 *
 *        y = [[ 0.],
 *        [ 1.]]
 *
 *        broadcast_logical_and(x, y) = [[ 0.,  0.,  0.],
 *        [ 1.,  1.,  1.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L154
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_logical_and(Symbol lhs,
                                    Symbol rhs) {
  return Operator("broadcast_logical_and")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of element-wise **logical or** with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  0.],
 *        [ 1.,  1.,  0.]]
 *
 *        y = [[ 1.],
 *        [ 0.]]
 *
 *        broadcast_logical_or(x, y) = [[ 1.,  1.,  1.],
 *        [ 1.,  1.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L172
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_logical_or(Symbol lhs,
                                   Symbol rhs) {
  return Operator("broadcast_logical_or")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Returns the result of element-wise **logical xor** with broadcasting.
 *
 *        Example::
 *
 *        x = [[ 1.,  1.,  0.],
 *        [ 1.,  1.,  0.]]
 *
 *        y = [[ 1.],
 *        [ 0.]]
 *
 *        broadcast_logical_xor(x, y) = [[ 0.,  0.,  1.],
 *        [ 1.,  1.,  0.]]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L190
 * \param lhs First input to the function
 * \param rhs Second input to the function
 * \return new symbol
 */
inline Symbol broadcast_logical_xor(Symbol lhs,
                                    Symbol rhs) {
  return Operator("broadcast_logical_xor")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Extracts a diagonal or constructs a diagonal array.
 *
 *        ``diag``'s behavior depends on the input array dimensions:
 *
 *        - 1-D arrays: constructs a 2-D array with the input as its diagonal, all other
 *        - N-D arrays: extracts the diagonals of the sub-arrays with axes specified by
 *        The output shape would be decided by removing the axes numbered ``axis1`` and
 *        input shape and appending to the result a new axis with the size of the
 *
 *        For example, when the input shape is `(2, 3, 4, 5)`, ``axis1`` and ``axis2``
 *        respectively and ``k`` is 0, the resulting shape would be `(3, 5, 2)`.
 *
 *        Examples::
 *
 *        x = [[1, 2, 3],
 *        [4, 5, 6]]
 *
 *        diag(x) = [1, 5]
 *
 *        diag(x, k=1) = [2, 6]
 *
 *        diag(x, k=-1) = [4]
 *
 *        x = [1, 2, 3]
 *
 *        diag(x) = [[1, 0, 0],
 *        [0, 2, 0],
 *        [0, 0, 3]]
 *
 *        diag(x, k=1) = [[0, 1, 0],
 *        [0, 0, 2],
 *        [0, 0, 0]]
 *
 *        diag(x, k=-1) = [[0, 0, 0],
 *        [1, 0, 0],
 *        [0, 2, 0]]
 *
 *        x = [[[1, 2],
 *        [3, 4]],
 *
 *        [[5, 6],
 *        [7, 8]]]
 *
 *        diag(x) = [[1, 7],
 *        [2, 8]]
 *
 *        diag(x, k=1) = [[3],
 *        [4]]
 *
 *        diag(x, axis1=-2, axis2=-1) = [[1, 4],
 *        [5, 8]]
 *
 *
 *
 *        Defined in src/operator/tensor/diag_op.cc:L87
 * \param data Input ndarray
 * \param k Diagonal in question. The default is 0. Use k>0 for diagonals above the main
 *        diagonal, and k<0 for diagonals below the main diagonal. If input has shape (S0
 * \param axis1 The first axis of the sub-arrays of interest. Ignored when the input is a
 * \param axis2 The second axis of the sub-arrays of interest. Ignored when the input is
 * \return new symbol
 */
inline Symbol diag(Symbol data,
                   int k = 0,
                   int axis1 = 0,
                   int axis2 = 1) {
  return Operator("diag")
           .SetParam("k", k)
           .SetParam("axis1", axis1)
           .SetParam("axis2", axis2)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the sum of array elements over given axes.
 *
 *        .. Note::
 *
 *        `sum` and `sum_axis` are equivalent.
 *        For ndarray of csr storage type summation along axis 0 and axis 1 is supported.
 *        Setting keepdims or exclude to True will cause a fallback to dense operator.
 *
 *        Example::
 *
 *        data = [[[1, 2], [2, 3], [1, 3]],
 *        [[1, 4], [4, 3], [5, 2]],
 *        [[7, 1], [7, 2], [7, 3]]]
 *
 *        sum(data, axis=1)
 *        [[  4.   8.]
 *        [ 10.   9.]
 *        [ 21.   6.]]
 *
 *        sum(data, axis=[1,2])
 *        [ 12.  19.  27.]
 *
 *        data = [[1, 2, 0],
 *        [3, 0, 1],
 *        [4, 1, 0]]
 *
 *        csr = cast_storage(data, 'csr')
 *
 *        sum(csr, axis=0)
 *        [ 8.  3.  1.]
 *
 *        sum(csr, axis=1)
 *        [ 3.  4.  5.]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L116
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol sum(Symbol data,
                  dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                  bool keepdims = false,
                  bool exclude = false) {
  return Operator("sum")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the mean of array elements over given axes.
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L132
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol mean(Symbol data,
                   dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                   bool keepdims = false,
                   bool exclude = false) {
  return Operator("mean")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the product of array elements over given axes.
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L147
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol prod(Symbol data,
                   dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                   bool keepdims = false,
                   bool exclude = false) {
  return Operator("prod")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the sum of array elements over given axes treating Not a Numbers
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L162
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol nansum(Symbol data,
                     dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                     bool keepdims = false,
                     bool exclude = false) {
  return Operator("nansum")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the product of array elements over given axes treating Not a Numbers
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L177
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol nanprod(Symbol data,
                      dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                      bool keepdims = false,
                      bool exclude = false) {
  return Operator("nanprod")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the max of array elements over given axes.
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L191
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol max(Symbol data,
                  dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                  bool keepdims = false,
                  bool exclude = false) {
  return Operator("max")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the min of array elements over given axes.
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L205
 * \param data The input
 * \param axis The axis or axes along which to perform the reduction.
 *
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *
 *        If `axis` is int, a reduction is performed on a particular axis.
 *
 *        If `axis` is a tuple of ints, a reduction is performed on all the axes
 *        specified in the tuple.
 *
 *        If `exclude` is true, reduction will be performed on the axes that are
 *        NOT in axis instead.
 *
 *        Negative values means indexing from right to left.
 * \param keepdims If this is set to `True`, the reduced axes are left in the result as
 * \param exclude Whether to perform reduction on axis that are NOT in axis instead.
 * \return new symbol
 */
inline Symbol min(Symbol data,
                  dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                  bool keepdims = false,
                  bool exclude = false) {
  return Operator("min")
           .SetParam("axis", axis)
           .SetParam("keepdims", keepdims)
           .SetParam("exclude", exclude)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Broadcasts the input array over particular axes.
 *
 *        Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
 *        `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.
 *
 *        Example::
 *
 *        // given x of shape (1,2,1)
 *        x = [[[ 1.],
 *        [ 2.]]]
 *
 *        // broadcast x on on axis 2
 *        broadcast_axis(x, axis=2, size=3) = [[[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]]]
 *        // broadcast x on on axes 0 and 2
 *        broadcast_axis(x, axis=(0,2), size=(2,3)) = [[[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]],
 *        [[ 1.,  1.,  1.],
 *        [ 2.,  2.,  2.]]]
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L238
 * \param data The input
 * \param axis The axes to perform the broadcasting.
 * \param size Target sizes of the broadcasting axes.
 * \return new symbol
 */
inline Symbol broadcast_axis(Symbol data,
                             Shape axis = {},
                             Shape size = {}) {
  return Operator("broadcast_axis")
           .SetParam("axis", axis)
           .SetParam("size", size)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Broadcasts the input array to a new shape.
 *
 *        Broadcasting is a mechanism that allows NDArrays to perform arithmetic
 *        with arrays of different shapes efficiently without creating multiple copies of
 *        Also see, `Broadcasting
 *        <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more
 *
 *        Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
 *        `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.
 *
 *        For example::
 *
 *        broadcast_to([[1,2,3]], shape=(2,3)) = [[ 1.,  2.,  3.],
 *        [ 1.,  2.,  3.]])
 *
 *        The dimension which you do not want to change can also be kept as `0` which
 *        So with `shape=(2,0)`, we will obtain the same result as in the above example.
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L262
 * \param data The input
 * \param shape The shape of the desired array. We can set the dim to zero if it's same
 *        as the original. E.g `A = broadcast_to(B, shape=(10, 0, 0))` has the same
 * \return new symbol
 */
inline Symbol broadcast_to(Symbol data,
                           Shape shape = {}) {
  return Operator("broadcast_to")
           .SetParam("shape", shape)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \return new symbol
 */
inline Symbol _broadcast_backward() {
  return Operator("_broadcast_backward")
           .CreateSymbol();
}

/*!
 * \brief Broadcasts lhs to have the same shape as rhs.
 *
 *        Broadcasting is a mechanism that allows NDArrays to perform arithmetic
 *        with arrays of different shapes efficiently without creating multiple copies of
 *        Also see, `Broadcasting
 *        <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more
 *
 *        Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
 *        `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.
 *
 *        For example::
 *
 *        broadcast_like([[1,2,3]], [[5,6,7],[7,8,9]]) = [[ 1.,  2.,  3.],
 *        [ 1.,  2.,  3.]])
 *
 *        broadcast_like([9], [1,2,3,4,5], lhs_axes=(0,), rhs_axes=(-1,)) = [9,9,9,9,9]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L315
 * \param lhs First input.
 * \param rhs Second input.
 * \param lhs_axes Axes to perform broadcast on in the first input array
 * \param rhs_axes Axes to copy from the second input array
 * \return new symbol
 */
inline Symbol broadcast_like(Symbol lhs,
                             Symbol rhs,
                             dmlc::optional<Shape> lhs_axes = dmlc::optional<Shape>(),
                             dmlc::optional<Shape> rhs_axes = dmlc::optional<Shape>()) {
  return Operator("broadcast_like")
           .SetParam("lhs_axes", lhs_axes)
           .SetParam("rhs_axes", rhs_axes)
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Computes the norm on an NDArray.
 *
 *        This operator computes the norm on an NDArray with the specified axis, depending
 *        on the value of the ord parameter. By default, it computes the L2 norm on the
 *        array. Currently only ord=2 supports sparse ndarrays.
 *
 *        Examples::
 *
 *        x = [[[1, 2],
 *        [3, 4]],
 *        [[2, 2],
 *        [5, 6]]]
 *
 *        norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ]
 *        [5.3851647 6.3245554]]
 *
 *        norm(x, ord=1, axis=1) = [[4., 6.],
 *        [7., 8.]]
 *
 *        rsp = x.cast_storage('row_sparse')
 *
 *        norm(rsp) = [5.47722578]
 *
 *        csr = x.cast_storage('csr')
 *
 *        norm(csr) = [5.47722578]
 *
 *
 *
 *        Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L350
 * \param data The input
 * \param ord Order of the norm. Currently ord=1 and ord=2 is supported.
 * \param axis The axis or axes along which to perform the reduction.
 *        The default, `axis=()`, will compute over all elements into a
 *        scalar array with shape `(1,)`.
 *        If `axis` is int, a reduction is performed on a particular axis.
 *        If `axis` is a 2-tuple, it specifies the axes that hold 2-D matrices,
 *        and the matrix norms of these matrices are computed.
 * \param out_dtype The data type of the output.
 * \param keepdims If this is set to `True`, the reduced axis is left in the result as
 * \return new symbol
 */
inline Symbol norm(Symbol data,
                   int ord = 2,
                   dmlc::optional<Shape> axis = dmlc::optional<Shape>(),
                   NormOutDtype out_dtype = NormOutDtype::kNone,
                   bool keepdims = false) {
  static const char *NormOutDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64",
    "int32",
    "int64",
    "int8"
  };
  return Operator("norm")
           .SetParam("ord", ord)
           .SetParam("axis", axis)
           .SetParam("out_dtype", NormOutDtypeValues[int(out_dtype)])
           .SetParam("keepdims", keepdims)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _equal(Symbol lhs,
                     Symbol rhs) {
  return Operator("_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _not_equal(Symbol lhs,
                         Symbol rhs) {
  return Operator("_not_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _greater(Symbol lhs,
                       Symbol rhs) {
  return Operator("_greater")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _greater_equal(Symbol lhs,
                             Symbol rhs) {
  return Operator("_greater_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _lesser(Symbol lhs,
                      Symbol rhs) {
  return Operator("_lesser")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _lesser_equal(Symbol lhs,
                            Symbol rhs) {
  return Operator("_lesser_equal")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _logical_and(Symbol lhs,
                           Symbol rhs) {
  return Operator("_logical_and")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _logical_or(Symbol lhs,
                          Symbol rhs) {
  return Operator("_logical_or")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief
 * \param lhs first input
 * \param rhs second input
 * \return new symbol
 */
inline Symbol _logical_xor(Symbol lhs,
                           Symbol rhs) {
  return Operator("_logical_xor")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Return the elements, either from x or y, depending on the condition.
 *
 *        Given three ndarrays, condition, x, and y, return an ndarray with the elements
 *        depending on the elements from condition are true or false. x and y must have
 *        If condition has the same shape as x, each element in the output array is from
 *        corresponding element in the condition is true, and from y if false.
 *
 *        If condition does not have the same shape as x, it must be a 1D array whose
 *        the same as x's first dimension size. Each row of the output array is from x's
 *        if the corresponding element from condition is true, and from y's row if false.
 *
 *        Note that all non-zero values are interpreted as ``True`` in condition.
 *
 *        Examples::
 *
 *        x = [[1, 2], [3, 4]]
 *        y = [[5, 6], [7, 8]]
 *        cond = [[0, 1], [-1, 0]]
 *
 *        where(cond, x, y) = [[5, 2], [3, 8]]
 *
 *        csr_cond = cast_storage(cond, 'csr')
 *
 *        where(csr_cond, x, y) = [[5, 2], [3, 8]]
 *
 *
 *
 *        Defined in src/operator/tensor/control_flow_op.cc:L57
 * \param condition condition array
 * \param x
 * \param y
 * \return new symbol
 */
inline Symbol where(Symbol condition,
                    Symbol x,
                    Symbol y) {
  return Operator("where")
           .SetInput("condition", condition)
           .SetInput("x", x)
           .SetInput("y", y)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _maximum_scalar(Symbol data,
                              mx_float scalar) {
  return Operator("_maximum_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _minimum_scalar(Symbol data,
                              mx_float scalar) {
  return Operator("_minimum_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _power_scalar(Symbol data,
                            mx_float scalar) {
  return Operator("_power_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _rpower_scalar(Symbol data,
                             mx_float scalar) {
  return Operator("_rpower_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _hypot_scalar(Symbol data,
                            mx_float scalar) {
  return Operator("_hypot_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Calculate Smooth L1 Loss(lhs, scalar) by summing
 *
 *        .. math::
 *
 *        f(x) =
 *        \begin{cases}
 *        (\sigma x)^2/2,& \text{if }x < 1/\sigma^2\\
 *        |x|-0.5/\sigma^2,& \text{otherwise}
 *        \end{cases}
 *
 *        where :math:`x` is an element of the tensor *lhs* and :math:`\sigma` is the
 *
 *        Example::
 *
 *        smooth_l1([1, 2, 3, 4]) = [0.5, 1.5, 2.5, 3.5]
 *        smooth_l1([1, 2, 3, 4], scalar=1) = [0.5, 1.5, 2.5, 3.5]
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_binary_scalar_op_extended.cc:L104
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol smooth_l1(Symbol data,
                        mx_float scalar) {
  return Operator("smooth_l1")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Reshapes the input array.
 *
 *        .. note:: ``Reshape`` is deprecated, use ``reshape``
 *
 *        Given an array and a shape, this function returns a copy of the array in the
 *        The shape is a tuple of integers such as (2,3,4). The size of the new shape
 *
 *        Example::
 *
 *        reshape([1,2,3,4], shape=(2,2)) = [[1,2], [3,4]]
 *
 *        Some dimensions of the shape can take special values from the set {0, -1, -2,
 *
 *        - ``0``  copy this dimension from the input to the output shape.
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (4,0,2), output shape = (4,3,2)
 *        - input shape = (2,3,4), shape = (2,0,0), output shape = (2,3,4)
 *
 *        - ``-1`` infers the dimension of the output shape by using the remainder of the
 *        keeping the size of the new array same as that of the input array.
 *        At most one dimension of shape can be -1.
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (6,1,-1), output shape = (6,1,4)
 *        - input shape = (2,3,4), shape = (3,-1,8), output shape = (3,1,8)
 *        - input shape = (2,3,4), shape=(-1,), output shape = (24,)
 *
 *        - ``-2`` copy all/remainder of the input dimensions to the output shape.
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (-2,), output shape = (2,3,4)
 *        - input shape = (2,3,4), shape = (2,-2), output shape = (2,3,4)
 *        - input shape = (2,3,4), shape = (-2,1,1), output shape = (2,3,4,1,1)
 *
 *        - ``-3`` use the product of two consecutive dimensions of the input shape as
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (-3,4), output shape = (6,4)
 *        - input shape = (2,3,4,5), shape = (-3,-3), output shape = (6,20)
 *        - input shape = (2,3,4), shape = (0,-3), output shape = (2,12)
 *        - input shape = (2,3,4), shape = (-3,-2), output shape = (6,4)
 *
 *        - ``-4`` split one dimension of the input into two dimensions passed subsequent
 *
 *        Example::
 *
 *        - input shape = (2,3,4), shape = (-4,1,2,-2), output shape =(1,2,3,4)
 *        - input shape = (2,3,4), shape = (2,-4,-1,3,-2), output shape = (2,1,3,4)
 *
 *        If the argument `reverse` is set to 1, then the special values are inferred
 *
 *        Example::
 *
 *        - without reverse=1, for input shape = (10,5,4), shape = (-1,0), output shape
 *        - with reverse=1, output shape will be (50,4).
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L202
 * \param data Input data to reshape.
 * \param shape The target shape
 * \param reverse If true then the special values are inferred from right to left
 * \param target_shape (Deprecated! Use ``shape`` instead.) Target new shape. One and
 * \param keep_highest (Deprecated! Use ``shape`` instead.) Whether keep the highest dim
 *        unchanged.If set to true, then the first dim in target_shape is ignored,and
 * \return new symbol
 */
inline Symbol Reshape(Symbol data,
                      Shape shape = {},
                      bool reverse = false,
                      Shape target_shape = {},
                      bool keep_highest = false) {
  return Operator("Reshape")
           .SetParam("shape", shape)
           .SetParam("reverse", reverse)
           .SetParam("target_shape", target_shape)
           .SetParam("keep_highest", keep_highest)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Flattens the input array into a 2-D array by collapsing the higher dimensions.
 *
 *        .. note:: `Flatten` is deprecated. Use `flatten` instead.
 *
 *        For an input array with shape ``(d1, d2, ..., dk)``, `flatten` operation
 *        the input array into an output array of shape ``(d1, d2*...*dk)``.
 *
 *        Note that the bahavior of this function is different from numpy.ndarray.flatten,
 *        which behaves similar to mxnet.ndarray.reshape((-1,)).
 *
 *        Example::
 *
 *        x = [[
 *        [1,2,3],
 *        [4,5,6],
 *        [7,8,9]
 *        ],
 *        [    [1,2,3],
 *        [4,5,6],
 *        [7,8,9]
 *        ]],
 *
 *        flatten(x) = [[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
 *        [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L291
 * \param data Input array.
 * \return new symbol
 */
inline Symbol Flatten(Symbol data) {
  return Operator("Flatten")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Permutes the dimensions of an array.
 *
 *        Examples::
 *
 *        x = [[ 1, 2],
 *        [ 3, 4]]
 *
 *        transpose(x) = [[ 1.,  3.],
 *        [ 2.,  4.]]
 *
 *        x = [[[ 1.,  2.],
 *        [ 3.,  4.]],
 *
 *        [[ 5.,  6.],
 *        [ 7.,  8.]]]
 *
 *        transpose(x) = [[[ 1.,  5.],
 *        [ 3.,  7.]],
 *
 *        [[ 2.,  6.],
 *        [ 4.,  8.]]]
 *
 *        transpose(x, axes=(1,0,2)) = [[[ 1.,  2.],
 *        [ 5.,  6.]],
 *
 *        [[ 3.,  4.],
 *        [ 7.,  8.]]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L375
 * \param data Source input
 * \param axes Target axis order. By default the axes will be inverted.
 * \return new symbol
 */
inline Symbol transpose(Symbol data,
                        Shape axes = {}) {
  return Operator("transpose")
           .SetParam("axes", axes)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Inserts a new axis of size 1 into the array shape
 *
 *        For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1)``
 *        will return a new array with shape ``(2,1,3,4)``.
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L416
 * \param data Source input
 * \param axis Position where new axis is to be inserted. Suppose that the input
 *        `NDArray`'s dimension is `ndim`, the range of the inserted axis is `[-ndim,
 * \return new symbol
 */
inline Symbol expand_dims(Symbol data,
                          int axis) {
  return Operator("expand_dims")
           .SetParam("axis", axis)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Slices a region of the array.
 *
 *        .. note:: ``crop`` is deprecated. Use ``slice`` instead.
 *
 *        This function returns a sliced array between the indices given
 *        by `begin` and `end` with the corresponding `step`.
 *
 *        For an input array of ``shape=(d_0, d_1, ..., d_n-1)``,
 *        slice operation with ``begin=(b_0, b_1...b_m-1)``,
 *        ``end=(e_0, e_1, ..., e_m-1)``, and ``step=(s_0, s_1, ..., s_m-1)``,
 *        where m <= n, results in an array with the shape
 *        ``(|e_0-b_0|/|s_0|, ..., |e_m-1-b_m-1|/|s_m-1|, d_m, ..., d_n-1)``.
 *
 *        The resulting array's *k*-th dimension contains elements
 *        from the *k*-th dimension of the input array starting
 *        from index ``b_k`` (inclusive) with step ``s_k``
 *        until reaching ``e_k`` (exclusive).
 *
 *        If the *k*-th elements are `None` in the sequence of `begin`, `end`,
 *        and `step`, the following rule will be used to set default values.
 *        If `s_k` is `None`, set `s_k=1`. If `s_k > 0`, set `b_k=0`, `e_k=d_k`;
 *        else, set `b_k=d_k-1`, `e_k=-1`.
 *
 *        The storage type of ``slice`` output depends on storage types of inputs
 *
 *        - slice(csr) = csr
 *        - otherwise, ``slice`` generates output with default storage
 *
 *        .. note:: When input data storage type is csr, it only supports
 *        step=(), or step=(None,), or step=(1,) to generate a csr output.
 *        For other step parameter values, it falls back to slicing
 *        a dense tensor.
 *
 *        Example::
 *
 *        x = [[  1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.],
 *        [  9.,  10.,  11.,  12.]]
 *
 *        slice(x, begin=(0,1), end=(2,4)) = [[ 2.,  3.,  4.],
 *        [ 6.,  7.,  8.]]
 *        slice(x, begin=(None, 0), end=(None, 3), step=(-1, 2)) = [[9., 11.],
 *        [5.,  7.],
 *        [1.,  3.]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L506
 * \param data Source input
 * \param begin starting indices for the slice operation, supports negative indices.
 * \param end ending indices for the slice operation, supports negative indices.
 * \param step step for the slice operation, supports negative values.
 * \return new symbol
 */
inline Symbol slice(Symbol data,
                    Shape begin,
                    Shape end,
                    Shape step = {}) {
  return Operator("slice")
           .SetParam("begin", begin)
           .SetParam("end", end)
           .SetParam("step", step)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Assign the rhs to a cropped subset of lhs.
 *
 *        Requirements
 *        ------------
 *        - output should be explicitly given and be the same as lhs.
 *        - lhs and rhs are of the same data type, and on the same device.
 *
 *
 *        From:src/operator/tensor/matrix_op.cc:531
 * \param lhs Source input
 * \param rhs value to assign
 * \param begin starting indices for the slice operation, supports negative indices.
 * \param end ending indices for the slice operation, supports negative indices.
 * \param step step for the slice operation, supports negative values.
 * \return new symbol
 */
inline Symbol _slice_assign(Symbol lhs,
                            Symbol rhs,
                            Shape begin,
                            Shape end,
                            Shape step = {}) {
  return Operator("_slice_assign")
           .SetParam("begin", begin)
           .SetParam("end", end)
           .SetParam("step", step)
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief (Assign the scalar to a cropped subset of the input.
 *
 *        Requirements
 *        ------------
 *        - output should be explicitly given and be the same as input
 *        )
 *
 *        From:src/operator/tensor/matrix_op.cc:556
 * \param data Source input
 * \param begin starting indices for the slice operation, supports negative indices.
 * \param end ending indices for the slice operation, supports negative indices.
 * \param scalar The scalar value for assignment.
 * \param step step for the slice operation, supports negative values.
 * \return new symbol
 */
inline Symbol _slice_assign_scalar(Symbol data,
                                   Shape begin,
                                   Shape end,
                                   double scalar = 0,
                                   Shape step = {}) {
  return Operator("_slice_assign_scalar")
           .SetParam("begin", begin)
           .SetParam("end", end)
           .SetParam("scalar", scalar)
           .SetParam("step", step)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Slices along a given axis.
 *
 *        Returns an array slice along a given `axis` starting from the `begin` index
 *        to the `end` index.
 *
 *        Examples::
 *
 *        x = [[  1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.],
 *        [  9.,  10.,  11.,  12.]]
 *
 *        slice_axis(x, axis=0, begin=1, end=3) = [[  5.,   6.,   7.,   8.],
 *        [  9.,  10.,  11.,  12.]]
 *
 *        slice_axis(x, axis=1, begin=0, end=2) = [[  1.,   2.],
 *        [  5.,   6.],
 *        [  9.,  10.]]
 *
 *        slice_axis(x, axis=1, begin=-3, end=-1) = [[  2.,   3.],
 *        [  6.,   7.],
 *        [ 10.,  11.]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L596
 * \param data Source input
 * \param axis Axis along which to be sliced, supports negative indexes.
 * \param begin The beginning index along the axis to be sliced,  supports negative
 * \param end The ending index along the axis to be sliced,  supports negative indexes.
 * \return new symbol
 */
inline Symbol slice_axis(Symbol data,
                         int axis,
                         int begin,
                         dmlc::optional<int> end) {
  return Operator("slice_axis")
           .SetParam("axis", axis)
           .SetParam("begin", begin)
           .SetParam("end", end)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Slices a region of the array like the shape of another array.
 *
 *        This function is similar to ``slice``, however, the `begin` are always `0`s
 *        and `end` of specific axes are inferred from the second input `shape_like`.
 *
 *        Given the second `shape_like` input of ``shape=(d_0, d_1, ..., d_n-1)``,
 *        a ``slice_like`` operator with default empty `axes`, it performs the
 *        following operation:
 *
 *        `` out = slice(input, begin=(0, 0, ..., 0), end=(d_0, d_1, ..., d_n-1))``.
 *
 *        When `axes` is not empty, it is used to speficy which axes are being sliced.
 *
 *        Given a 4-d input data, ``slice_like`` operator with ``axes=(0, 2, -1)``
 *        will perform the following operation:
 *
 *        `` out = slice(input, begin=(0, 0, 0, 0), end=(d_0, None, d_2, d_3))``.
 *
 *        Note that it is allowed to have first and second input with different
 *        however, you have to make sure the `axes` are specified and not exceeding the
 *        dimension limits.
 *
 *        For example, given `input_1` with ``shape=(2,3,4,5)`` and `input_2` with
 *        ``shape=(1,2,3)``, it is not allowed to use:
 *
 *        `` out = slice_like(a, b)`` because ndim of `input_1` is 4, and ndim of
 *        is 3.
 *
 *        The following is allowed in this situation:
 *
 *        `` out = slice_like(a, b, axes=(0, 2))``
 *
 *        Example::
 *
 *        x = [[  1.,   2.,   3.,   4.],
 *        [  5.,   6.,   7.,   8.],
 *        [  9.,  10.,  11.,  12.]]
 *
 *        y = [[  0.,   0.,   0.],
 *        [  0.,   0.,   0.]]
 *
 *        slice_like(x, y) = [[ 1.,  2.,  3.]
 *        [ 5.,  6.,  7.]]
 *        slice_like(x, y, axes=(0, 1)) = [[ 1.,  2.,  3.]
 *        [ 5.,  6.,  7.]]
 *        slice_like(x, y, axes=(0)) = [[ 1.,  2.,  3.,  4.]
 *        [ 5.,  6.,  7.,  8.]]
 *        slice_like(x, y, axes=(-1)) = [[  1.,   2.,   3.]
 *        [  5.,   6.,   7.]
 *        [  9.,  10.,  11.]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L665
 * \param data Source input
 * \param shape_like Shape like input
 * \param axes List of axes on which input data will be sliced according to the
 *        corresponding size of the second input. By default will slice on all axes.
 * \return new symbol
 */
inline Symbol slice_like(Symbol data,
                         Symbol shape_like,
                         Shape axes = {}) {
  return Operator("slice_like")
           .SetParam("axes", axes)
           .SetInput("data", data)
           .SetInput("shape_like", shape_like)
           .CreateSymbol();
}

/*!
 * \brief Clips (limits) the values in an array.
 *
 *        Given an interval, values outside the interval are clipped to the interval
 *        Clipping ``x`` between `a_min` and `a_x` would be::
 *
 *        clip(x, a_min, a_max) = max(min(x, a_max), a_min))
 *
 *        Example::
 *
 *        x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 *
 *        clip(x,1,8) = [ 1.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.]
 *
 *        The storage type of ``clip`` output depends on storage types of inputs and the
 *        parameter values:
 *
 *        - clip(default) = default
 *        - clip(row_sparse, a_min <= 0, a_max >= 0) = row_sparse
 *        - clip(csr, a_min <= 0, a_max >= 0) = csr
 *        - clip(row_sparse, a_min < 0, a_max < 0) = default
 *        - clip(row_sparse, a_min > 0, a_max > 0) = default
 *        - clip(csr, a_min < 0, a_max < 0) = csr
 *        - clip(csr, a_min > 0, a_max > 0) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L723
 * \param data Input array.
 * \param a_min Minimum value
 * \param a_max Maximum value
 * \return new symbol
 */
inline Symbol clip(Symbol data,
                   mx_float a_min,
                   mx_float a_max) {
  return Operator("clip")
           .SetParam("a_min", a_min)
           .SetParam("a_max", a_max)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Repeats elements of an array.
 *
 *        By default, ``repeat`` flattens the input array into 1-D and then repeats the
 *        elements::
 *
 *        x = [[ 1, 2],
 *        [ 3, 4]]
 *
 *        repeat(x, repeats=2) = [ 1.,  1.,  2.,  2.,  3.,  3.,  4.,  4.]
 *
 *        The parameter ``axis`` specifies the axis along which to perform repeat::
 *
 *        repeat(x, repeats=2, axis=1) = [[ 1.,  1.,  2.,  2.],
 *        [ 3.,  3.,  4.,  4.]]
 *
 *        repeat(x, repeats=2, axis=0) = [[ 1.,  2.],
 *        [ 1.,  2.],
 *        [ 3.,  4.],
 *        [ 3.,  4.]]
 *
 *        repeat(x, repeats=2, axis=-1) = [[ 1.,  1.,  2.,  2.],
 *        [ 3.,  3.,  4.,  4.]]
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L796
 * \param data Input data array
 * \param repeats The number of repetitions for each element.
 * \param axis The axis along which to repeat values. The negative numbers are
 *        interpreted counting from the backward. By default, use the flattened input
 * \return new symbol
 */
inline Symbol repeat(Symbol data,
                     int repeats,
                     dmlc::optional<int> axis = dmlc::optional<int>()) {
  return Operator("repeat")
           .SetParam("repeats", repeats)
           .SetParam("axis", axis)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Repeats the whole array multiple times.
 *
 *        If ``reps`` has length *d*, and input array has dimension of *n*. There are
 *        three cases:
 *
 *        - **n=d**. Repeat *i*-th dimension of the input by ``reps[i]`` times::
 *
 *        x = [[1, 2],
 *        [3, 4]]
 *
 *        tile(x, reps=(2,3)) = [[ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.],
 *        [ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.]]
 *
 *        - **n>d**. ``reps`` is promoted to length *n* by pre-pending 1's to it. Thus for
 *        an input shape ``(2,3)``, ``repos=(2,)`` is treated as ``(1,2)``::
 *
 *
 *        tile(x, reps=(2,)) = [[ 1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.]]
 *
 *        - **n<d**. The input is promoted to be d-dimensional by prepending new axes. So
 *        shape ``(2,2)`` array is promoted to ``(1,2,2)`` for 3-D replication::
 *
 *        tile(x, reps=(2,2,3)) = [[[ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.],
 *        [ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.]],
 *
 *        [[ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.],
 *        [ 1.,  2.,  1.,  2.,  1.,  2.],
 *        [ 3.,  4.,  3.,  4.,  3.,  4.]]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L857
 * \param data Input data array
 * \param reps The number of times for repeating the tensor a. Each dim size of reps must
 *        be a positive integer. If reps has length d, the result will have dimension of
 *        max(d, a.ndim); If a.ndim < d, a is promoted to be d-dimensional by prepending
 * \return new symbol
 */
inline Symbol tile(Symbol data,
                   Shape reps) {
  return Operator("tile")
           .SetParam("reps", reps)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Reverses the order of elements along given axis while preserving array shape.
 *
 *        Note: reverse and flip are equivalent. We use reverse in the following examples.
 *
 *        Examples::
 *
 *        x = [[ 0.,  1.,  2.,  3.,  4.],
 *        [ 5.,  6.,  7.,  8.,  9.]]
 *
 *        reverse(x, axis=0) = [[ 5.,  6.,  7.,  8.,  9.],
 *        [ 0.,  1.,  2.,  3.,  4.]]
 *
 *        reverse(x, axis=1) = [[ 4.,  3.,  2.,  1.,  0.],
 *        [ 9.,  8.,  7.,  6.,  5.]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L898
 * \param data Input data array
 * \param axis The axis which to reverse elements.
 * \return new symbol
 */
inline Symbol reverse(Symbol data,
                      Shape axis) {
  return Operator("reverse")
           .SetParam("axis", axis)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Join a sequence of arrays along a new axis.
 *
 *        The axis parameter specifies the index of the new axis in the dimensions of the
 *        result. For example, if axis=0 it will be the first dimension and if axis=-1 it
 *        will be the last dimension.
 *
 *        Examples::
 *
 *        x = [1, 2]
 *        y = [3, 4]
 *
 *        stack(x, y) = [[1, 2],
 *        [3, 4]]
 *        stack(x, y, axis=1) = [[1, 3],
 *        [2, 4]]
 *
 * \param data List of arrays to stack
 * \param num_args Number of inputs to be stacked.
 * \param axis The axis in the result array along which the input arrays are stacked.
 * \return new symbol
 */
inline Symbol stack(const std::vector<Symbol>& data,
                    int num_args,
                    int axis = 0) {
  return Operator("stack")
           .SetParam("num_args", num_args)
           .SetParam("axis", axis)
(data)
           .CreateSymbol();
}

/*!
 * \brief Remove single-dimensional entries from the shape of an array.
 *        Same behavior of defining the output tensor shape as numpy.squeeze for the most
 *        See the following note for exception.
 *
 *        Examples::
 *
 *        data = [[[0], [1], [2]]]
 *        squeeze(data) = [0, 1, 2]
 *        squeeze(data, axis=0) = [[0], [1], [2]]
 *        squeeze(data, axis=2) = [[0, 1, 2]]
 *        squeeze(data, axis=(0, 2)) = [0, 1, 2]
 *
 *        .. Note::
 *        The output of this operator will keep at least one dimension not removed. For
 *        squeeze([[[4]]]) = [4], while in numpy.squeeze, the output will become a scalar.
 *
 * \param data data to squeeze
 * \param axis Selects a subset of the single-dimensional entries in the shape. If an
 * \return new symbol
 */
inline Symbol squeeze(const std::vector<Symbol>& data,
                      dmlc::optional<Shape> axis = dmlc::optional<Shape>()) {
  return Operator("squeeze")
           .SetParam("axis", axis)
(data)
           .CreateSymbol();
}

/*!
 * \brief Rearranges(permutes) data from depth into blocks of spatial data.
 *        Similar to ONNX DepthToSpace operator:
 *        https://github.com/onnx/onnx/blob/master/docs/Operators.md#DepthToSpace.
 *        The output is a new tensor where the values from depth dimension are moved in
 *        to height and width dimension. The reverse of this operation is
 *
 *        .. math::
 *
 *        \begin{gather*}
 *        x \prime = reshape(x, [N, block\_size, block\_size, C / (block\_size ^ 2), H *
 *        x \prime \prime = transpose(x \prime, [0, 3, 4, 1, 5, 2]) \\
 *        y = reshape(x \prime \prime, [N, C / (block\_size ^ 2), H * block\_size, W *
 *        \end{gather*}
 *
 *        where :math:`x` is an input tensor with default layout as :math:`[N, C, H, W]`:
 *        and :math:`y` is the output tensor of layout :math:`[N, C / (block\_size ^ 2),
 *
 *        Example::
 *
 *        x = [[[[0, 1, 2],
 *        [3, 4, 5]],
 *        [[6, 7, 8],
 *        [9, 10, 11]],
 *        [[12, 13, 14],
 *        [15, 16, 17]],
 *        [[18, 19, 20],
 *        [21, 22, 23]]]]
 *
 *        depth_to_space(x, 2) = [[[[0, 6, 1, 7, 2, 8],
 *        [12, 18, 13, 19, 14, 20],
 *        [3, 9, 4, 10, 5, 11],
 *        [15, 21, 16, 22, 17, 23]]]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L1050
 * \param data Input ndarray
 * \param block_size Blocks of [block_size. block_size] are moved
 * \return new symbol
 */
inline Symbol depth_to_space(Symbol data,
                             int block_size) {
  return Operator("depth_to_space")
           .SetParam("block_size", block_size)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Rearranges(permutes) blocks of spatial data into depth.
 *        Similar to ONNX SpaceToDepth operator:
 *        https://github.com/onnx/onnx/blob/master/docs/Operators.md#SpaceToDepth
 *
 *        The output is a new tensor where the values from height and width dimension are
 *        moved to the depth dimension. The reverse of this operation is
 *
 *        .. math::
 *
 *        \begin{gather*}
 *        x \prime = reshape(x, [N, C, H / block\_size, block\_size, W / block\_size,
 *        x \prime \prime = transpose(x \prime, [0, 3, 5, 1, 2, 4]) \\
 *        y = reshape(x \prime \prime, [N, C * (block\_size ^ 2), H / block\_size, W /
 *        \end{gather*}
 *
 *        where :math:`x` is an input tensor with default layout as :math:`[N, C, H, W]`:
 *        and :math:`y` is the output tensor of layout :math:`[N, C * (block\_size ^ 2),
 *
 *        Example::
 *
 *        x = [[[[0, 6, 1, 7, 2, 8],
 *        [12, 18, 13, 19, 14, 20],
 *        [3, 9, 4, 10, 5, 11],
 *        [15, 21, 16, 22, 17, 23]]]]
 *
 *
 *        space_to_depth(x, 2) = [[[[0, 1, 2],
 *        [3, 4, 5]],
 *        [[6, 7, 8],
 *        [9, 10, 11]],
 *        [[12, 13, 14],
 *        [15, 16, 17]],
 *        [[18, 19, 20],
 *        [21, 22, 23]]]]
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L1104
 * \param data Input ndarray
 * \param block_size Blocks of [block_size. block_size] are moved
 * \return new symbol
 */
inline Symbol space_to_depth(Symbol data,
                             int block_size) {
  return Operator("space_to_depth")
           .SetParam("block_size", block_size)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Splits an array along a particular axis into multiple sub-arrays.
 *
 *        Example::
 *
 *        x  = [[[ 1.]
 *        [ 2.]]
 *        [[ 3.]
 *        [ 4.]]
 *        [[ 5.]
 *        [ 6.]]]
 *        x.shape = (3, 2, 1)
 *
 *        y = split_v2(x, axis=1, indices_or_sections=2) // a list of 2 arrays with shape
 *        y = [[[ 1.]]
 *        [[ 3.]]
 *        [[ 5.]]]
 *
 *        [[[ 2.]]
 *        [[ 4.]]
 *        [[ 6.]]]
 *
 *        y[0].shape = (3, 1, 1)
 *
 *        z = split_v2(x, axis=0, indices_or_sections=3) // a list of 3 arrays with shape
 *        z = [[[ 1.]
 *        [ 2.]]]
 *
 *        [[[ 3.]
 *        [ 4.]]]
 *
 *        [[[ 5.]
 *        [ 6.]]]
 *
 *        z[0].shape = (1, 2, 1)
 *
 *        w = split_v2(x, axis=0, indices_or_sections=(1,)) // a list of 2 arrays with
 *        w = [[[ 1.]
 *        [ 2.]]]
 *
 *        [[[3.]
 *        [4.]]
 *
 *        [[5.]
 *        [6.]]]
 *
 *        w[0].shape = (1, 2, 1)
 *        w[1].shape = (2, 2, 1)
 *
 *        `squeeze_axis=True` removes the axis with length 1 from the shapes of the
 *        **Note** that setting `squeeze_axis` to ``1`` removes axis with length 1 only
 *        along the `axis` which it is split.
 *        Also `squeeze_axis` can be set to true only if ``input.shape[axis] ==
 *
 *        Example::
 *
 *        z = split_v2(x, axis=0, indices_or_sections=3, squeeze_axis=1) // a list of 3
 *        z = [[ 1.]
 *        [ 2.]]
 *
 *        [[ 3.]
 *        [ 4.]]
 *
 *        [[ 5.]
 *        [ 6.]]
 *        z[0].shape = (2, 1)
 *
 *
 *
 *        Defined in src/operator/tensor/matrix_op.cc:L1190
 * \param data The input
 * \param indices Indices of splits. The elements should denote the boundaries of at
 * \param axis Axis along which to split.
 * \param squeeze_axis If true, Removes the axis with length 1 from the shapes of the
 *        output arrays. **Note** that setting `squeeze_axis` to ``true`` removes axis
 *        with length 1 only along the `axis` which it is split. Also `squeeze_axis` can
 * \param sections Number of sections if equally splitted. Default to 0 which means split
 * \return new symbol
 */
inline Symbol _split_v2(Symbol data,
                        Shape indices,
                        int axis = 1,
                        bool squeeze_axis = false,
                        int sections = 0) {
  return Operator("_split_v2")
           .SetParam("indices", indices)
           .SetParam("axis", axis)
           .SetParam("squeeze_axis", squeeze_axis)
           .SetParam("sections", sections)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \return new symbol
 */
inline Symbol _split_v2_backward() {
  return Operator("_split_v2_backward")
           .CreateSymbol();
}

/*!
 * \brief Casts tensor storage type to the new type.
 *
 *        When an NDArray with default storage type is cast to csr or row_sparse storage,
 *        the result is compact, which means:
 *
 *        - for csr, zero values will not be retained
 *        - for row_sparse, row slices of all zeros will not be retained
 *
 *        The storage type of ``cast_storage`` output depends on stype parameter:
 *
 *        - cast_storage(csr, 'default') = default
 *        - cast_storage(row_sparse, 'default') = default
 *        - cast_storage(default, 'csr') = csr
 *        - cast_storage(default, 'row_sparse') = row_sparse
 *        - cast_storage(csr, 'csr') = csr
 *        - cast_storage(row_sparse, 'row_sparse') = row_sparse
 *
 *        Example::
 *
 *        dense = [[ 0.,  1.,  0.],
 *        [ 2.,  0.,  3.],
 *        [ 0.,  0.,  0.],
 *        [ 0.,  0.,  0.]]
 *
 *        # cast to row_sparse storage type
 *        rsp = cast_storage(dense, 'row_sparse')
 *        rsp.indices = [0, 1]
 *        rsp.values = [[ 0.,  1.,  0.],
 *        [ 2.,  0.,  3.]]
 *
 *        # cast to csr storage type
 *        csr = cast_storage(dense, 'csr')
 *        csr.indices = [1, 0, 2]
 *        csr.values = [ 1.,  2.,  3.]
 *        csr.indptr = [0, 1, 3, 3, 3]
 *
 *
 *
 *        Defined in src/operator/tensor/cast_storage.cc:L71
 * \param data The input.
 * \param stype Output storage type.
 * \return new symbol
 */
inline Symbol cast_storage(Symbol data,
                           Cast_storageStype stype) {
  static const char *Cast_storageStypeValues[] = {
    "csr",
    "default",
    "row_sparse"
  };
  return Operator("cast_storage")
           .SetParam("stype", Cast_storageStypeValues[int(stype)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Converts a batch of index arrays into an array of flat indices. The operator
 *        follows numpy conventions so a single multi index is given by a column of the
 *        input matrix. The leading dimension may be left unspecified by using -1 as
 *
 *        Examples::
 *
 *        A = [[3,6,6],[4,5,1]]
 *        ravel(A, shape=(7,6)) = [22,41,37]
 *        ravel(A, shape=(-1,6)) = [22,41,37]
 *
 *
 *
 *        Defined in src/operator/tensor/ravel.cc:L42
 * \param data Batch of multi-indices
 * \param shape Shape of the array into which the multi-indices apply.
 * \return new symbol
 */
inline Symbol _ravel_multi_index(Symbol data,
                                 Shape shape = Shape()) {
  return Operator("_ravel_multi_index")
           .SetParam("shape", shape)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Converts an array of flat indices into a batch of index arrays. The operator
 *        follows numpy conventions so a single multi index is given by a column of the
 *        output matrix. The leading dimension may be left unspecified by using -1 as
 *
 *        Examples::
 *
 *        A = [22,41,37]
 *        unravel(A, shape=(7,6)) = [[3,6,6],[4,5,1]]
 *        unravel(A, shape=(-1,6)) = [[3,6,6],[4,5,1]]
 *
 *
 *
 *        Defined in src/operator/tensor/ravel.cc:L67
 * \param data Array of flat indices
 * \param shape Shape of the array into which the multi-indices apply.
 * \return new symbol
 */
inline Symbol _unravel_index(Symbol data,
                             Shape shape = Shape()) {
  return Operator("_unravel_index")
           .SetParam("shape", shape)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _equal_scalar(Symbol data,
                            mx_float scalar) {
  return Operator("_equal_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _not_equal_scalar(Symbol data,
                                mx_float scalar) {
  return Operator("_not_equal_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _greater_scalar(Symbol data,
                              mx_float scalar) {
  return Operator("_greater_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _greater_equal_scalar(Symbol data,
                                    mx_float scalar) {
  return Operator("_greater_equal_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _lesser_scalar(Symbol data,
                             mx_float scalar) {
  return Operator("_lesser_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _lesser_equal_scalar(Symbol data,
                                   mx_float scalar) {
  return Operator("_lesser_equal_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _logical_and_scalar(Symbol data,
                                  mx_float scalar) {
  return Operator("_logical_and_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _logical_or_scalar(Symbol data,
                                 mx_float scalar) {
  return Operator("_logical_or_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _logical_xor_scalar(Symbol data,
                                  mx_float scalar) {
  return Operator("_logical_xor_scalar")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the element-wise sine of the input array.
 *
 *        The input should be in radians (:math:`2\pi` rad equals 360 degrees).
 *
 *        .. math::
 *        sin([0, \pi/4, \pi/2]) = [0, 0.707, 1]
 *
 *        The storage type of ``sin`` output depends upon the input storage type:
 *
 *        - sin(default) = default
 *        - sin(row_sparse) = row_sparse
 *        - sin(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L46
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sin(Symbol data) {
  return Operator("sin")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the element-wise cosine of the input array.
 *
 *        The input should be in radians (:math:`2\pi` rad equals 360 degrees).
 *
 *        .. math::
 *        cos([0, \pi/4, \pi/2]) = [1, 0.707, 0]
 *
 *        The storage type of ``cos`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L89
 * \param data The input array.
 * \return new symbol
 */
inline Symbol cos(Symbol data) {
  return Operator("cos")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the element-wise tangent of the input array.
 *
 *        The input should be in radians (:math:`2\pi` rad equals 360 degrees).
 *
 *        .. math::
 *        tan([0, \pi/4, \pi/2]) = [0, 1, -inf]
 *
 *        The storage type of ``tan`` output depends upon the input storage type:
 *
 *        - tan(default) = default
 *        - tan(row_sparse) = row_sparse
 *        - tan(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L139
 * \param data The input array.
 * \return new symbol
 */
inline Symbol tan(Symbol data) {
  return Operator("tan")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise inverse sine of the input array.
 *
 *        The input should be in the range `[-1, 1]`.
 *        The output is in the closed interval of [:math:`-\pi/2`, :math:`\pi/2`].
 *
 *        .. math::
 *        arcsin([-1, -.707, 0, .707, 1]) = [-\pi/2, -\pi/4, 0, \pi/4, \pi/2]
 *
 *        The storage type of ``arcsin`` output depends upon the input storage type:
 *
 *        - arcsin(default) = default
 *        - arcsin(row_sparse) = row_sparse
 *        - arcsin(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L160
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arcsin(Symbol data) {
  return Operator("arcsin")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise inverse cosine of the input array.
 *
 *        The input should be in range `[-1, 1]`.
 *        The output is in the closed interval :math:`[0, \pi]`
 *
 *        .. math::
 *        arccos([-1, -.707, 0, .707, 1]) = [\pi, 3\pi/4, \pi/2, \pi/4, 0]
 *
 *        The storage type of ``arccos`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L179
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arccos(Symbol data) {
  return Operator("arccos")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns element-wise inverse tangent of the input array.
 *
 *        The output is in the closed interval :math:`[-\pi/2, \pi/2]`
 *
 *        .. math::
 *        arctan([-1, 0, 1]) = [-\pi/4, 0, \pi/4]
 *
 *        The storage type of ``arctan`` output depends upon the input storage type:
 *
 *        - arctan(default) = default
 *        - arctan(row_sparse) = row_sparse
 *        - arctan(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L200
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arctan(Symbol data) {
  return Operator("arctan")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Converts each element of the input array from radians to degrees.
 *
 *        .. math::
 *        degrees([0, \pi/2, \pi, 3\pi/2, 2\pi]) = [0, 90, 180, 270, 360]
 *
 *        The storage type of ``degrees`` output depends upon the input storage type:
 *
 *        - degrees(default) = default
 *        - degrees(row_sparse) = row_sparse
 *        - degrees(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L219
 * \param data The input array.
 * \return new symbol
 */
inline Symbol degrees(Symbol data) {
  return Operator("degrees")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Converts each element of the input array from degrees to radians.
 *
 *        .. math::
 *        radians([0, 90, 180, 270, 360]) = [0, \pi/2, \pi, 3\pi/2, 2\pi]
 *
 *        The storage type of ``radians`` output depends upon the input storage type:
 *
 *        - radians(default) = default
 *        - radians(row_sparse) = row_sparse
 *        - radians(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L238
 * \param data The input array.
 * \return new symbol
 */
inline Symbol radians(Symbol data) {
  return Operator("radians")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the hyperbolic sine of the input array, computed element-wise.
 *
 *        .. math::
 *        sinh(x) = 0.5\times(exp(x) - exp(-x))
 *
 *        The storage type of ``sinh`` output depends upon the input storage type:
 *
 *        - sinh(default) = default
 *        - sinh(row_sparse) = row_sparse
 *        - sinh(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L257
 * \param data The input array.
 * \return new symbol
 */
inline Symbol sinh(Symbol data) {
  return Operator("sinh")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the hyperbolic cosine  of the input array, computed element-wise.
 *
 *        .. math::
 *        cosh(x) = 0.5\times(exp(x) + exp(-x))
 *
 *        The storage type of ``cosh`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L272
 * \param data The input array.
 * \return new symbol
 */
inline Symbol cosh(Symbol data) {
  return Operator("cosh")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the hyperbolic tangent of the input array, computed element-wise.
 *
 *        .. math::
 *        tanh(x) = sinh(x) / cosh(x)
 *
 *        The storage type of ``tanh`` output depends upon the input storage type:
 *
 *        - tanh(default) = default
 *        - tanh(row_sparse) = row_sparse
 *        - tanh(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L290
 * \param data The input array.
 * \return new symbol
 */
inline Symbol tanh(Symbol data) {
  return Operator("tanh")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the element-wise inverse hyperbolic sine of the input array, \
 *        computed element-wise.
 *
 *        The storage type of ``arcsinh`` output depends upon the input storage type:
 *
 *        - arcsinh(default) = default
 *        - arcsinh(row_sparse) = row_sparse
 *        - arcsinh(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L306
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arcsinh(Symbol data) {
  return Operator("arcsinh")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the element-wise inverse hyperbolic cosine of the input array, \
 *        computed element-wise.
 *
 *        The storage type of ``arccosh`` output is always dense
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L320
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arccosh(Symbol data) {
  return Operator("arccosh")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns the element-wise inverse hyperbolic tangent of the input array, \
 *        computed element-wise.
 *
 *        The storage type of ``arctanh`` output depends upon the input storage type:
 *
 *        - arctanh(default) = default
 *        - arctanh(row_sparse) = row_sparse
 *        - arctanh(csr) = csr
 *
 *
 *
 *        Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L337
 * \param data The input array.
 * \return new symbol
 */
inline Symbol arctanh(Symbol data) {
  return Operator("arctanh")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Concurrent sampling from multiple multinomial distributions.
 *
 *        *data* is an *n* dimensional array whose last dimension has length *k*, where
 *        *k* is the number of possible outcomes of each multinomial distribution. This
 *        operator will draw *shape* samples from each distribution. If shape is empty
 *        one sample will be drawn from each distribution.
 *
 *        If *get_prob* is true, a second array containing log likelihood of the drawn
 *        samples will also be returned. This is usually used for reinforcement learning
 *        where you can provide reward as head gradient for this array to estimate
 *        gradient.
 *
 *        Note that the input distribution must be normalized, i.e. *data* must sum to
 *        1 along its last axis.
 *
 *        Examples::
 *
 *        probs = [[0, 0.1, 0.2, 0.3, 0.4], [0.4, 0.3, 0.2, 0.1, 0]]
 *
 *        // Draw a single sample for each distribution
 *        sample_multinomial(probs) = [3, 0]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_multinomial(probs, shape=(2)) = [[4, 2],
 *        [0, 0]]
 *
 *        // requests log likelihood
 *        sample_multinomial(probs, get_prob=True) = [2, 1], [0.2, 0.3]
 *
 * \param data Distribution probabilities. Must sum to one on the last axis.
 * \param shape Shape to be sampled from each random distribution.
 * \param get_prob Whether to also return the log probability of sampled result. This is
 *        usually used for differentiating through stochastic variables, e.g. in
 * \param dtype DType of the output in case this can't be inferred.
 * \return new symbol
 */
inline Symbol _sample_multinomial(Symbol data,
                                  Shape shape = {},
                                  bool get_prob = false,
                                  _sample_multinomialDtype dtype = _sample_multinomialDtype::kInt32) {
  static const char *_sample_multinomialDtypeValues[] = {
    "float16",
    "float32",
    "float64",
    "int32",
    "uint8"
  };
  return Operator("_sample_multinomial")
           .SetParam("shape", shape)
           .SetParam("get_prob", get_prob)
           .SetParam("dtype", _sample_multinomialDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Concurrent sampling from multiple
 *        uniform distributions on the intervals given by *[low,high)*.
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Examples::
 *
 *        low = [ 0.0, 2.5 ]
 *        high = [ 1.0, 3.7 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_uniform(low, high) = [ 0.40451524,  3.18687344]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_uniform(low, high, shape=(2)) = [[ 0.40451524,  0.18017688],
 *        [ 3.18687344,  3.68352246]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L276
 * \param low Lower bounds of the distributions.
 * \param high Upper bounds of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_uniform(Symbol low,
                              Symbol high,
                              Shape shape = Shape(),
                              _sample_uniformDtype dtype = _sample_uniformDtype::kNone) {
  static const char *_sample_uniformDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_uniform")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_uniformDtypeValues[int(dtype)])
           .SetInput("low", low)
           .SetInput("high", high)
           .CreateSymbol();
}

/*!
 * \brief Concurrent sampling from multiple
 *        normal distributions with parameters *mu* (mean) and *sigma* (standard
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Examples::
 *
 *        mu = [ 0.0, 2.5 ]
 *        sigma = [ 1.0, 3.7 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_normal(mu, sigma) = [-0.56410581,  0.95934606]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_normal(mu, sigma, shape=(2)) = [[-0.56410581,  0.2928229 ],
 *        [ 0.95934606,  4.48287058]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L278
 * \param mu Means of the distributions.
 * \param sigma Standard deviations of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_normal(Symbol mu,
                             Symbol sigma,
                             Shape shape = Shape(),
                             _sample_normalDtype dtype = _sample_normalDtype::kNone) {
  static const char *_sample_normalDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_normal")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_normalDtypeValues[int(dtype)])
           .SetInput("mu", mu)
           .SetInput("sigma", sigma)
           .CreateSymbol();
}

/*!
 * \brief Concurrent sampling from multiple
 *        gamma distributions with parameters *alpha* (shape) and *beta* (scale).
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Examples::
 *
 *        alpha = [ 0.0, 2.5 ]
 *        beta = [ 1.0, 0.7 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_gamma(alpha, beta) = [ 0.        ,  2.25797319]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_gamma(alpha, beta, shape=(2)) = [[ 0.        ,  0.        ],
 *        [ 2.25797319,  1.70734084]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L280
 * \param alpha Alpha (shape) parameters of the distributions.
 * \param beta Beta (scale) parameters of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_gamma(Symbol alpha,
                            Symbol beta,
                            Shape shape = Shape(),
                            _sample_gammaDtype dtype = _sample_gammaDtype::kNone) {
  static const char *_sample_gammaDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_gamma")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_gammaDtypeValues[int(dtype)])
           .SetInput("alpha", alpha)
           .SetInput("beta", beta)
           .CreateSymbol();
}

/*!
 * \brief Concurrent sampling from multiple
 *        exponential distributions with parameters lambda (rate).
 *
 *        The parameters of the distributions are provided as an input array.
 *        Let *[s]* be the shape of the input array, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input array,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input value at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input array.
 *
 *        Examples::
 *
 *        lam = [ 1.0, 8.5 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_exponential(lam) = [ 0.51837951,  0.09994757]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_exponential(lam, shape=(2)) = [[ 0.51837951,  0.19866663],
 *        [ 0.09994757,  0.50447971]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L283
 * \param lam Lambda (rate) parameters of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_exponential(Symbol lam,
                                  Shape shape = Shape(),
                                  _sample_exponentialDtype dtype = _sample_exponentialDtype::kNone) {
  static const char *_sample_exponentialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_exponential")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_exponentialDtypeValues[int(dtype)])
           .SetInput("lam", lam)
           .CreateSymbol();
}

/*!
 * \brief Concurrent sampling from multiple
 *        Poisson distributions with parameters lambda (rate).
 *
 *        The parameters of the distributions are provided as an input array.
 *        Let *[s]* be the shape of the input array, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input array,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input value at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input array.
 *
 *        Samples will always be returned as a floating point data type.
 *
 *        Examples::
 *
 *        lam = [ 1.0, 8.5 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_poisson(lam) = [  0.,  13.]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_poisson(lam, shape=(2)) = [[  0.,   4.],
 *        [ 13.,   8.]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L285
 * \param lam Lambda (rate) parameters of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_poisson(Symbol lam,
                              Shape shape = Shape(),
                              _sample_poissonDtype dtype = _sample_poissonDtype::kNone) {
  static const char *_sample_poissonDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_poisson")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_poissonDtypeValues[int(dtype)])
           .SetInput("lam", lam)
           .CreateSymbol();
}

/*!
 * \brief Concurrent sampling from multiple
 *        negative binomial distributions with parameters *k* (failure limit) and *p*
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Samples will always be returned as a floating point data type.
 *
 *        Examples::
 *
 *        k = [ 20, 49 ]
 *        p = [ 0.4 , 0.77 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_negative_binomial(k, p) = [ 15.,  16.]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_negative_binomial(k, p, shape=(2)) = [[ 15.,  50.],
 *        [ 16.,  12.]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L287
 * \param k Limits of unsuccessful experiments.
 * \param p Failure probabilities in each experiment.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_negative_binomial(Symbol k,
                                        Symbol p,
                                        Shape shape = Shape(),
                                        _sample_negative_binomialDtype dtype = _sample_negative_binomialDtype::kNone) {
  static const char *_sample_negative_binomialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_negative_binomial")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_negative_binomialDtypeValues[int(dtype)])
           .SetInput("k", k)
           .SetInput("p", p)
           .CreateSymbol();
}

/*!
 * \brief Concurrent sampling from multiple
 *        generalized negative binomial distributions with parameters *mu* (mean) and
 *
 *        The parameters of the distributions are provided as input arrays.
 *        Let *[s]* be the shape of the input arrays, *n* be the dimension of *[s]*, *[t]*
 *        be the shape specified as the parameter of the operator, and *m* be the
 *        of *[t]*. Then the output will be a *(n+m)*-dimensional array with shape
 *
 *        For any valid *n*-dimensional index *i* with respect to the input arrays,
 *        will be an *m*-dimensional array that holds randomly drawn samples from the
 *        which is parameterized by the input values at index *i*. If the shape parameter
 *        operator is not set, then one sample will be drawn per distribution and the
 *        has the same shape as the input arrays.
 *
 *        Samples will always be returned as a floating point data type.
 *
 *        Examples::
 *
 *        mu = [ 2.0, 2.5 ]
 *        alpha = [ 1.0, 0.1 ]
 *
 *        // Draw a single sample for each distribution
 *        sample_generalized_negative_binomial(mu, alpha) = [ 0.,  3.]
 *
 *        // Draw a vector containing two samples for each distribution
 *        sample_generalized_negative_binomial(mu, alpha, shape=(2)) = [[ 0.,  3.],
 *        [ 3.,  1.]]
 *
 *
 *        Defined in src/operator/random/multisample_op.cc:L290
 * \param mu Means of the distributions.
 * \param alpha Alpha (dispersion) parameters of the distributions.
 * \param shape Shape to be sampled from each random distribution.
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _sample_generalized_negative_binomial(Symbol mu,
                                                    Symbol alpha,
                                                    Shape shape = Shape(),
                                                    _sample_generalized_negative_binomialDtype dtype = _sample_generalized_negative_binomialDtype::kNone) {
  static const char *_sample_generalized_negative_binomialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_sample_generalized_negative_binomial")
           .SetParam("shape", shape)
           .SetParam("dtype", _sample_generalized_negative_binomialDtypeValues[int(dtype)])
           .SetInput("mu", mu)
           .SetInput("alpha", alpha)
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from an an approximately log-uniform
 *        or Zipfian distribution without replacement.
 *
 *        This operation takes a 2-D shape `(batch_size, num_sampled)`,
 *        and randomly generates *num_sampled* samples from the range of integers [0,
 *        for each instance in the batch.
 *
 *        The elements in each instance are drawn without replacement from the base
 *        The base distribution for this operator is an approximately log-uniform or
 *
 *        P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)
 *
 *        Additionaly, it also returns the number of trials used to obtain `num_sampled`
 *        each instance in the batch.
 *
 *        Example::
 *
 *        samples, trials = _sample_unique_zipfian(750000, shape=(4, 8192))
 *        unique(samples[0]) = 8192
 *        unique(samples[3]) = 8192
 *        trials[0] = 16435
 *
 *
 *
 *        Defined in src/operator/random/unique_sample_op.cc:L66
 * \param range_max The number of possible classes.
 * \param shape 2-D shape of the output, where shape[0] is the batch size, and shape[1]
 * \return new symbol
 */
inline Symbol _sample_unique_zipfian(int range_max,
                                     Shape shape = Shape()) {
  return Operator("_sample_unique_zipfian")
           .SetParam("range_max", range_max)
           .SetParam("shape", shape)
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a uniform distribution.
 *
 *        .. note:: The existing alias ``uniform`` is deprecated.
 *
 *        Samples are uniformly distributed over the half-open interval *[low, high)*
 *        (includes *low*, but excludes *high*).
 *
 *        Example::
 *
 *        uniform(low=0, high=1, shape=(2,2)) = [[ 0.60276335,  0.85794562],
 *        [ 0.54488319,  0.84725171]]
 *
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L96
 * \param low Lower bound of the distribution.
 * \param high Upper bound of the distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_uniform(mx_float low = 0,
                              mx_float high = 1,
                              Shape shape = Shape(),
                              const std::string& ctx = "",
                              _random_uniformDtype dtype = _random_uniformDtype::kNone) {
  static const char *_random_uniformDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_uniform")
           .SetParam("low", low)
           .SetParam("high", high)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_uniformDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a normal (Gaussian) distribution.
 *
 *        .. note:: The existing alias ``normal`` is deprecated.
 *
 *        Samples are distributed according to a normal distribution parametrized by
 *        (standard deviation).
 *
 *        Example::
 *
 *        normal(loc=0, scale=1, shape=(2,2)) = [[ 1.89171135, -1.16881478],
 *        [-1.23474145,  1.55807114]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L113
 * \param loc Mean of the distribution.
 * \param scale Standard deviation of the distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_normal(mx_float loc = 0,
                             mx_float scale = 1,
                             Shape shape = Shape(),
                             const std::string& ctx = "",
                             _random_normalDtype dtype = _random_normalDtype::kNone) {
  static const char *_random_normalDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_normal")
           .SetParam("loc", loc)
           .SetParam("scale", scale)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_normalDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a gamma distribution.
 *
 *        Samples are distributed according to a gamma distribution parametrized by
 *
 *        Example::
 *
 *        gamma(alpha=9, beta=0.5, shape=(2,2)) = [[ 7.10486984,  3.37695289],
 *        [ 3.91697288,  3.65933681]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L125
 * \param alpha Alpha parameter (shape) of the gamma distribution.
 * \param beta Beta parameter (scale) of the gamma distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_gamma(mx_float alpha = 1,
                            mx_float beta = 1,
                            Shape shape = Shape(),
                            const std::string& ctx = "",
                            _random_gammaDtype dtype = _random_gammaDtype::kNone) {
  static const char *_random_gammaDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_gamma")
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_gammaDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from an exponential distribution.
 *
 *        Samples are distributed according to an exponential distribution parametrized
 *
 *        Example::
 *
 *        exponential(lam=4, shape=(2,2)) = [[ 0.0097189 ,  0.08999364],
 *        [ 0.04146638,  0.31715935]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L137
 * \param lam Lambda parameter (rate) of the exponential distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_exponential(mx_float lam = 1,
                                  Shape shape = Shape(),
                                  const std::string& ctx = "",
                                  _random_exponentialDtype dtype = _random_exponentialDtype::kNone) {
  static const char *_random_exponentialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_exponential")
           .SetParam("lam", lam)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_exponentialDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a Poisson distribution.
 *
 *        Samples are distributed according to a Poisson distribution parametrized by
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        poisson(lam=4, shape=(2,2)) = [[ 5.,  2.],
 *        [ 4.,  6.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L150
 * \param lam Lambda parameter (rate) of the Poisson distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_poisson(mx_float lam = 1,
                              Shape shape = Shape(),
                              const std::string& ctx = "",
                              _random_poissonDtype dtype = _random_poissonDtype::kNone) {
  static const char *_random_poissonDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_poisson")
           .SetParam("lam", lam)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_poissonDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a negative binomial distribution.
 *
 *        Samples are distributed according to a negative binomial distribution
 *        *k* (limit of unsuccessful experiments) and *p* (failure probability in each
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        negative_binomial(k=3, p=0.4, shape=(2,2)) = [[ 4.,  7.],
 *        [ 2.,  5.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L164
 * \param k Limit of unsuccessful experiments.
 * \param p Failure probability in each experiment.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_negative_binomial(int k = 1,
                                        mx_float p = 1,
                                        Shape shape = Shape(),
                                        const std::string& ctx = "",
                                        _random_negative_binomialDtype dtype = _random_negative_binomialDtype::kNone) {
  static const char *_random_negative_binomialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_negative_binomial")
           .SetParam("k", k)
           .SetParam("p", p)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_negative_binomialDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a generalized negative binomial distribution.
 *
 *        Samples are distributed according to a generalized negative binomial
 *        *mu* (mean) and *alpha* (dispersion). *alpha* is defined as *1/k* where *k* is
 *        number of unsuccessful experiments (generalized to real numbers).
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        generalized_negative_binomial(mu=2.0, alpha=0.3, shape=(2,2)) = [[ 2.,  1.],
 *        [ 6.,  4.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L179
 * \param mu Mean of the negative binomial distribution.
 * \param alpha Alpha (dispersion) parameter of the negative binomial distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to float32
 * \return new symbol
 */
inline Symbol _random_generalized_negative_binomial(mx_float mu = 1,
                                                    mx_float alpha = 1,
                                                    Shape shape = Shape(),
                                                    const std::string& ctx = "",
                                                    _random_generalized_negative_binomialDtype dtype = _random_generalized_negative_binomialDtype::kNone) {
  static const char *_random_generalized_negative_binomialDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("_random_generalized_negative_binomial")
           .SetParam("mu", mu)
           .SetParam("alpha", alpha)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_generalized_negative_binomialDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a discrete uniform distribution.
 *
 *        Samples are uniformly distributed over the half-open interval *[low, high)*
 *        (includes *low*, but excludes *high*).
 *
 *        Example::
 *
 *        randint(low=0, high=5, shape=(2,2)) = [[ 0,  2],
 *        [ 3,  1]]
 *
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L193
 * \param low Lower bound of the distribution.
 * \param high Upper bound of the distribution.
 * \param shape Shape of the output.
 * \param ctx Context of output, in format [cpu|gpu|cpu_pinned](n). Only used for
 * \param dtype DType of the output in case this can't be inferred. Defaults to int32 if
 * \return new symbol
 */
inline Symbol _random_randint(int64_t low,
                              int64_t high,
                              Shape shape = Shape(),
                              const std::string& ctx = "",
                              _random_randintDtype dtype = _random_randintDtype::kNone) {
  static const char *_random_randintDtypeValues[] = {
    "None",
    "int32",
    "int64"
  };
  return Operator("_random_randint")
           .SetParam("low", low)
           .SetParam("high", high)
           .SetParam("shape", shape)
           .SetParam("dtype", _random_randintDtypeValues[int(dtype)])
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a uniform distribution according to the input array
 *
 *        Samples are uniformly distributed over the half-open interval *[low, high)*
 *        (includes *low*, but excludes *high*).
 *
 *        Example::
 *
 *        uniform(low=0, high=1, data=ones(2,2)) = [[ 0.60276335,  0.85794562],
 *        [ 0.54488319,  0.84725171]]
 *
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L208
 * \param data The input
 * \param low Lower bound of the distribution.
 * \param high Upper bound of the distribution.
 * \return new symbol
 */
inline Symbol _random_uniform_like(Symbol data,
                                   mx_float low = 0,
                                   mx_float high = 1) {
  return Operator("_random_uniform_like")
           .SetParam("low", low)
           .SetParam("high", high)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a normal (Gaussian) distribution according to the
 *
 *        Samples are distributed according to a normal distribution parametrized by
 *        (standard deviation).
 *
 *        Example::
 *
 *        normal(loc=0, scale=1, data=ones(2,2)) = [[ 1.89171135, -1.16881478],
 *        [-1.23474145,  1.55807114]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L220
 * \param data The input
 * \param loc Mean of the distribution.
 * \param scale Standard deviation of the distribution.
 * \return new symbol
 */
inline Symbol _random_normal_like(Symbol data,
                                  mx_float loc = 0,
                                  mx_float scale = 1) {
  return Operator("_random_normal_like")
           .SetParam("loc", loc)
           .SetParam("scale", scale)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a gamma distribution according to the input array
 *
 *        Samples are distributed according to a gamma distribution parametrized by
 *
 *        Example::
 *
 *        gamma(alpha=9, beta=0.5, data=ones(2,2)) = [[ 7.10486984,  3.37695289],
 *        [ 3.91697288,  3.65933681]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L231
 * \param data The input
 * \param alpha Alpha parameter (shape) of the gamma distribution.
 * \param beta Beta parameter (scale) of the gamma distribution.
 * \return new symbol
 */
inline Symbol _random_gamma_like(Symbol data,
                                 mx_float alpha = 1,
                                 mx_float beta = 1) {
  return Operator("_random_gamma_like")
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from an exponential distribution according to the input
 *
 *        Samples are distributed according to an exponential distribution parametrized
 *
 *        Example::
 *
 *        exponential(lam=4, data=ones(2,2)) = [[ 0.0097189 ,  0.08999364],
 *        [ 0.04146638,  0.31715935]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L242
 * \param data The input
 * \param lam Lambda parameter (rate) of the exponential distribution.
 * \return new symbol
 */
inline Symbol _random_exponential_like(Symbol data,
                                       mx_float lam = 1) {
  return Operator("_random_exponential_like")
           .SetParam("lam", lam)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a Poisson distribution according to the input array
 *
 *        Samples are distributed according to a Poisson distribution parametrized by
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        poisson(lam=4, data=ones(2,2)) = [[ 5.,  2.],
 *        [ 4.,  6.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L254
 * \param data The input
 * \param lam Lambda parameter (rate) of the Poisson distribution.
 * \return new symbol
 */
inline Symbol _random_poisson_like(Symbol data,
                                   mx_float lam = 1) {
  return Operator("_random_poisson_like")
           .SetParam("lam", lam)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a negative binomial distribution according to the
 *
 *        Samples are distributed according to a negative binomial distribution
 *        *k* (limit of unsuccessful experiments) and *p* (failure probability in each
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        negative_binomial(k=3, p=0.4, data=ones(2,2)) = [[ 4.,  7.],
 *        [ 2.,  5.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L267
 * \param data The input
 * \param k Limit of unsuccessful experiments.
 * \param p Failure probability in each experiment.
 * \return new symbol
 */
inline Symbol _random_negative_binomial_like(Symbol data,
                                             int k = 1,
                                             mx_float p = 1) {
  return Operator("_random_negative_binomial_like")
           .SetParam("k", k)
           .SetParam("p", p)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Draw random samples from a generalized negative binomial distribution according
 *        input array shape.
 *
 *        Samples are distributed according to a generalized negative binomial
 *        *mu* (mean) and *alpha* (dispersion). *alpha* is defined as *1/k* where *k* is
 *        number of unsuccessful experiments (generalized to real numbers).
 *        Samples will always be returned as a floating point data type.
 *
 *        Example::
 *
 *        generalized_negative_binomial(mu=2.0, alpha=0.3, data=ones(2,2)) = [[ 2.,  1.],
 *        [ 6.,  4.]]
 *
 *
 *        Defined in src/operator/random/sample_op.cc:L283
 * \param data The input
 * \param mu Mean of the negative binomial distribution.
 * \param alpha Alpha (dispersion) parameter of the negative binomial distribution.
 * \return new symbol
 */
inline Symbol _random_generalized_negative_binomial_like(Symbol data,
                                                         mx_float mu = 1,
                                                         mx_float alpha = 1) {
  return Operator("_random_generalized_negative_binomial_like")
           .SetParam("mu", mu)
           .SetParam("alpha", alpha)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Randomly shuffle the elements.
 *
 *        This shuffles the array along the first axis.
 *        The order of the elements in each subarray does not change.
 *        For example, if a 2D array is given, the order of the rows randomly changes,
 *        but the order of the elements in each row does not change.
 *
 * \param data Data to be shuffled.
 * \return new symbol
 */
inline Symbol _shuffle(Symbol data) {
  return Operator("_shuffle")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief elemwise_add operator for input dataA and input dataB data type of int8,
 *        and accumulates in type int32 for the output. For each argument, two more
 *        float32 must be provided representing the thresholds of quantizing argument
 *        type float32 to int8. The final outputs contain result in int32, and min
 *        and max thresholds representing the threholds for quantizing the float32 output
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *
 * \param lhs first input
 * \param rhs second input
 * \param lhs_min 3rd input
 * \param lhs_max 4th input
 * \param rhs_min 5th input
 * \param rhs_max 6th input
 * \return new symbol
 */
inline Symbol _contrib_quantized_elemwise_add(Symbol lhs,
                                              Symbol rhs,
                                              Symbol lhs_min,
                                              Symbol lhs_max,
                                              Symbol rhs_min,
                                              Symbol rhs_max) {
  return Operator("_contrib_quantized_elemwise_add")
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .SetInput("lhs_min", lhs_min)
           .SetInput("lhs_max", lhs_max)
           .SetInput("rhs_min", rhs_min)
           .SetInput("rhs_max", rhs_max)
           .CreateSymbol();
}

/*!
 * \brief Dequantize the input tensor into a float tensor.
 *        min_range and max_range are scalar floats that specify the range for
 *        the output data.
 *
 *        When input data type is `uint8`, the output is calculated using the following
 *
 *        `out[i] = in[i] * (max_range - min_range) / 255.0`,
 *
 *        When input data type is `int8`, the output is calculate using the following
 *        by keep zero centered for the quantized value:
 *
 *        `out[i] = in[i] * MaxAbs(min_range, max_range) / 127.0`,
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *
 *        Defined in src/operator/quantization/dequantize.cc:L83
 * \param data A ndarray/symbol of type `uint8`
 * \param min_range The minimum scalar value possibly produced for the input in float32
 * \param max_range The maximum scalar value possibly produced for the input in float32
 * \param out_type Output data type.
 * \return new symbol
 */
inline Symbol _contrib_dequantize(Symbol data,
                                  Symbol min_range,
                                  Symbol max_range,
                                  _contrib_dequantizeOutType out_type = _contrib_dequantizeOutType::kFloat32) {
  static const char *_contrib_dequantizeOutTypeValues[] = {
    "float32"
  };
  return Operator("_contrib_dequantize")
           .SetParam("out_type", _contrib_dequantizeOutTypeValues[int(out_type)])
           .SetInput("data", data)
           .SetInput("min_range", min_range)
           .SetInput("max_range", max_range)
           .CreateSymbol();
}

/*!
 * \brief Convolution operator for input, weight and bias data type of int8,
 *        and accumulates in type int32 for the output. For each argument, two more
 *        float32 must be provided representing the thresholds of quantizing argument
 *        type float32 to int8. The final outputs contain the convolution result in
 *        and max thresholds representing the threholds for quantizing the float32 output
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/quantized_conv.cc:L137
 * \param data Input data.
 * \param weight weight.
 * \param bias bias.
 * \param min_data Minimum value of data.
 * \param max_data Maximum value of data.
 * \param min_weight Minimum value of weight.
 * \param max_weight Maximum value of weight.
 * \param min_bias Minimum value of bias.
 * \param max_bias Maximum value of bias.
 * \param kernel Convolution kernel size: (w,), (h, w) or (d, h, w)
 * \param num_filter Convolution filter(channel) number
 * \param stride Convolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each
 * \param dilate Convolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each
 * \param pad Zero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.
 * \param num_group Number of group partitions.
 * \param workspace Maximum temporary workspace allowed (MB) in convolution.This
 *        parameter has two usages. When CUDNN is not used, it determines the effective
 *        batch size of the convolution kernel. When CUDNN is used, it controls the
 *        maximum temporary storage used for tuning the best CUDNN kernel when
 * \param no_bias Whether to disable bias parameter.
 * \param cudnn_tune Whether to pick convolution algo by running performance test.
 * \param cudnn_off Turn off cudnn for this layer.
 * \param layout Set layout for input, output and weight. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are
 * \return new symbol
 */
inline Symbol _contrib_quantized_conv(Symbol data,
                                      Symbol weight,
                                      Symbol bias,
                                      Symbol min_data,
                                      Symbol max_data,
                                      Symbol min_weight,
                                      Symbol max_weight,
                                      Symbol min_bias,
                                      Symbol max_bias,
                                      Shape kernel,
                                      uint32_t num_filter,
                                      Shape stride = {},
                                      Shape dilate = {},
                                      Shape pad = {},
                                      uint32_t num_group = 1,
                                      uint64_t workspace = 1024,
                                      bool no_bias = false,
                                      _contrib_quantized_convCudnnTune cudnn_tune = _contrib_quantized_convCudnnTune::kNone,
                                      bool cudnn_off = false,
                                      _contrib_quantized_convLayout layout = _contrib_quantized_convLayout::kNone) {
  static const char *_contrib_quantized_convCudnnTuneValues[] = {
    "None",
    "fastest",
    "limited_workspace",
    "off"
  };
  static const char *_contrib_quantized_convLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC"
  };
  return Operator("_contrib_quantized_conv")
           .SetParam("kernel", kernel)
           .SetParam("num_filter", num_filter)
           .SetParam("stride", stride)
           .SetParam("dilate", dilate)
           .SetParam("pad", pad)
           .SetParam("num_group", num_group)
           .SetParam("workspace", workspace)
           .SetParam("no_bias", no_bias)
           .SetParam("cudnn_tune", _contrib_quantized_convCudnnTuneValues[int(cudnn_tune)])
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("layout", _contrib_quantized_convLayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .SetInput("min_weight", min_weight)
           .SetInput("max_weight", max_weight)
           .SetInput("min_bias", min_bias)
           .SetInput("max_bias", max_bias)
           .CreateSymbol();
}

/*!
 * \brief Compute *N*-D convolution on *(N+2)*-D input.
 *
 *        In the 2-D convolution, given input data with shape *(batch_size,
 *        channel, height, width)*, the output is computed by
 *
 *        .. math::
 *
 *        out[n,i,:,:] = bias[i] + \sum_{j=0}^{channel} data[n,j,:,:] \star
 *        weight[i,j,:,:]
 *
 *        where :math:`\star` is the 2-D cross-correlation operator.
 *
 *        For general 2-D convolution, the shapes are
 *
 *        - **data**: *(batch_size, channel, height, width)*
 *        - **weight**: *(num_filter, channel, kernel[0], kernel[1])*
 *        - **bias**: *(num_filter,)*
 *        - **out**: *(batch_size, num_filter, out_height, out_width)*.
 *
 *        Define::
 *
 *        f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1
 *
 *        then we have::
 *
 *        out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
 *        out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])
 *
 *        If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
 *
 *        The default data ``layout`` is *NCHW*, namely *(batch_size, channel, height,
 *        width)*. We can choose other layouts such as *NWC*.
 *
 *        If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
 *        evenly into *g* parts along the channel axis, and also evenly split ``weight``
 *        along the first dimension. Next compute the convolution on the *i*-th part of
 *        the data with the *i*-th weight part. The output is obtained by concatenating
 *        the *g* results.
 *
 *        1-D convolution does not have *height* dimension but only *width* in space.
 *
 *        - **data**: *(batch_size, channel, width)*
 *        - **weight**: *(num_filter, channel, kernel[0])*
 *        - **bias**: *(num_filter,)*
 *        - **out**: *(batch_size, num_filter, out_width)*.
 *
 *        3-D convolution adds an additional *depth* dimension besides *height* and
 *        *width*. The shapes are
 *
 *        - **data**: *(batch_size, channel, depth, height, width)*
 *        - **weight**: *(num_filter, channel, kernel[0], kernel[1], kernel[2])*
 *        - **bias**: *(num_filter,)*
 *        - **out**: *(batch_size, num_filter, out_depth, out_height, out_width)*.
 *
 *        Both ``weight`` and ``bias`` are learnable parameters.
 *
 *        There are other options to tune the performance.
 *
 *        - **cudnn_tune**: enable this option leads to higher startup time but may give
 *        faster speed. Options are
 *
 *        - **off**: no tuning
 *        - **limited_workspace**:run test and pick the fastest algorithm that doesn't
 *        exceed workspace limit.
 *        - **fastest**: pick the fastest algorithm and ignore workspace limit.
 *        - **None** (default): the behavior is determined by environment variable
 *        ``MXNET_CUDNN_AUTOTUNE_DEFAULT``. 0 for off, 1 for limited workspace
 *        (default), 2 for fastest.
 *
 *        - **workspace**: A large number leads to more (GPU) memory usage but may improve
 *        the performance.
 *
 *
 *
 *        Defined in src/operator/nn/convolution.cc:L472
 * \param data Input data to the ConvolutionOp.
 * \param weight Weight matrix.
 * \param bias Bias parameter.
 * \param kernel Convolution kernel size: (w,), (h, w) or (d, h, w)
 * \param num_filter Convolution filter(channel) number
 * \param stride Convolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each
 * \param dilate Convolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each
 * \param pad Zero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.
 * \param num_group Number of group partitions.
 * \param workspace Maximum temporary workspace allowed (MB) in convolution.This
 *        parameter has two usages. When CUDNN is not used, it determines the effective
 *        batch size of the convolution kernel. When CUDNN is used, it controls the
 *        maximum temporary storage used for tuning the best CUDNN kernel when
 * \param no_bias Whether to disable bias parameter.
 * \param cudnn_tune Whether to pick convolution algo by running performance test.
 * \param cudnn_off Turn off cudnn for this layer.
 * \param layout Set layout for input, output and weight. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are
 * \return new symbol
 */
inline Symbol Convolution(Symbol data,
                          Symbol weight,
                          Symbol bias,
                          Shape kernel,
                          uint32_t num_filter,
                          Shape stride = {},
                          Shape dilate = {},
                          Shape pad = {},
                          uint32_t num_group = 1,
                          uint64_t workspace = 1024,
                          bool no_bias = false,
                          ConvolutionCudnnTune cudnn_tune = ConvolutionCudnnTune::kNone,
                          bool cudnn_off = false,
                          ConvolutionLayout layout = ConvolutionLayout::kNone) {
  static const char *ConvolutionCudnnTuneValues[] = {
    "None",
    "fastest",
    "limited_workspace",
    "off"
  };
  static const char *ConvolutionLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC"
  };
  return Operator("Convolution")
           .SetParam("kernel", kernel)
           .SetParam("num_filter", num_filter)
           .SetParam("stride", stride)
           .SetParam("dilate", dilate)
           .SetParam("pad", pad)
           .SetParam("num_group", num_group)
           .SetParam("workspace", workspace)
           .SetParam("no_bias", no_bias)
           .SetParam("cudnn_tune", ConvolutionCudnnTuneValues[int(cudnn_tune)])
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("layout", ConvolutionLayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data A ndarray/symbol of type `float32`
 * \param min_data The minimum scalar value possibly produced for the data
 * \param max_data The maximum scalar value possibly produced for the data
 * \return new symbol
 */
inline Symbol _contrib_quantized_flatten(Symbol data,
                                         Symbol min_data,
                                         Symbol max_data) {
  return Operator("_contrib_quantized_flatten")
           .SetInput("data", data)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .CreateSymbol();
}

/*!
 * \brief Fully Connected operator for input, weight and bias data type of int8,
 *        and accumulates in type int32 for the output. For each argument, two more
 *        float32 must be provided representing the thresholds of quantizing argument
 *        type float32 to int8. The final outputs contain the convolution result in
 *        and max thresholds representing the threholds for quantizing the float32 output
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/quantized_fully_connected.cc:L313
 * \param data Input data.
 * \param weight weight.
 * \param bias bias.
 * \param min_data Minimum value of data.
 * \param max_data Maximum value of data.
 * \param min_weight Minimum value of weight.
 * \param max_weight Maximum value of weight.
 * \param min_bias Minimum value of bias.
 * \param max_bias Maximum value of bias.
 * \param num_hidden Number of hidden nodes of the output.
 * \param no_bias Whether to disable bias parameter.
 * \param flatten Whether to collapse all but the first axis of the input data tensor.
 * \return new symbol
 */
inline Symbol _contrib_quantized_fully_connected(Symbol data,
                                                 Symbol weight,
                                                 Symbol bias,
                                                 Symbol min_data,
                                                 Symbol max_data,
                                                 Symbol min_weight,
                                                 Symbol max_weight,
                                                 Symbol min_bias,
                                                 Symbol max_bias,
                                                 int num_hidden,
                                                 bool no_bias = false,
                                                 bool flatten = true) {
  return Operator("_contrib_quantized_fully_connected")
           .SetParam("num_hidden", num_hidden)
           .SetParam("no_bias", no_bias)
           .SetParam("flatten", flatten)
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .SetInput("min_weight", min_weight)
           .SetInput("max_weight", max_weight)
           .SetInput("min_bias", min_bias)
           .SetInput("max_bias", max_bias)
           .CreateSymbol();
}

/*!
 * \brief Applies a linear transformation: :math:`Y = XW^T + b`.
 *
 *        If ``flatten`` is set to be true, then the shapes are:
 *
 *        - **data**: `(batch_size, x1, x2, ..., xn)`
 *        - **weight**: `(num_hidden, x1 * x2 * ... * xn)`
 *        - **bias**: `(num_hidden,)`
 *        - **out**: `(batch_size, num_hidden)`
 *
 *        If ``flatten`` is set to be false, then the shapes are:
 *
 *        - **data**: `(x1, x2, ..., xn, input_dim)`
 *        - **weight**: `(num_hidden, input_dim)`
 *        - **bias**: `(num_hidden,)`
 *        - **out**: `(x1, x2, ..., xn, num_hidden)`
 *
 *        The learnable parameters include both ``weight`` and ``bias``.
 *
 *        If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
 *
 *        .. Note::
 *
 *        The sparse support for FullyConnected is limited to forward evaluation with
 *        weight and bias, where the length of `weight.indices` and `bias.indices` must
 *        to `num_hidden`. This could be useful for model inference with `row_sparse`
 *        trained with importance sampling or noise contrastive estimation.
 *
 *        To compute linear transformation with 'csr' sparse data, sparse.dot is
 *        of sparse.FullyConnected.
 *
 *
 *
 *        Defined in src/operator/nn/fully_connected.cc:L277
 * \param data Input data.
 * \param weight Weight matrix.
 * \param bias Bias parameter.
 * \param num_hidden Number of hidden nodes of the output.
 * \param no_bias Whether to disable bias parameter.
 * \param flatten Whether to collapse all but the first axis of the input data tensor.
 * \return new symbol
 */
inline Symbol FullyConnected(Symbol data,
                             Symbol weight,
                             Symbol bias,
                             int num_hidden,
                             bool no_bias = false,
                             bool flatten = true) {
  return Operator("FullyConnected")
           .SetParam("num_hidden", num_hidden)
           .SetParam("no_bias", no_bias)
           .SetParam("flatten", flatten)
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .CreateSymbol();
}

/*!
 * \brief Pooling operator for input and output data type of int8.
 *        The input and output data comes with min and max thresholds for quantizing
 *        the float32 data into int8.
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *        This operator only supports `pool_type` of `avg` or `max`.
 *
 *        Defined in src/operator/quantization/quantized_pooling.cc:L145
 * \param data Input data.
 * \param min_data Minimum value of data.
 * \param max_data Maximum value of data.
 * \param kernel Pooling kernel size: (y, x) or (d, y, x)
 * \param pool_type Pooling type to be applied.
 * \param global_pool Ignore kernel size, do global pooling based on current input
 * \param cudnn_off Turn off cudnn pooling and use MXNet pooling operator.
 * \param pooling_convention Pooling convention to be applied.
 * \param stride Stride: for pooling (y, x) or (d, y, x). Defaults to 1 for each
 * \param pad Pad for pooling: (y, x) or (d, y, x). Defaults to no padding.
 * \param p_value Value of p for Lp pooling, can be 1 or 2, required for Lp Pooling.
 * \param count_include_pad Only used for AvgPool, specify whether to count padding
 *        elements for averagecalculation. For example, with a 5*5 kernel on a 3*3 corner
 *        of a image,the sum of the 9 valid elements will be divided by 25 if this is set
 * \param layout Set layout for input and output. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.
 * \return new symbol
 */
inline Symbol _contrib_quantized_pooling(Symbol data,
                                         Symbol min_data,
                                         Symbol max_data,
                                         Shape kernel = {},
                                         _contrib_quantized_poolingPoolType pool_type = _contrib_quantized_poolingPoolType::kMax,
                                         bool global_pool = false,
                                         bool cudnn_off = false,
                                         _contrib_quantized_poolingPoolingConvention pooling_convention = _contrib_quantized_poolingPoolingConvention::kValid,
                                         Shape stride = {},
                                         Shape pad = {},
                                         dmlc::optional<int> p_value = dmlc::optional<int>(),
                                         dmlc::optional<bool> count_include_pad = dmlc::optional<bool>(),
                                         _contrib_quantized_poolingLayout layout = _contrib_quantized_poolingLayout::kNone) {
  static const char *_contrib_quantized_poolingPoolTypeValues[] = {
    "avg",
    "lp",
    "max",
    "sum"
  };
  static const char *_contrib_quantized_poolingPoolingConventionValues[] = {
    "full",
    "same",
    "valid"
  };
  static const char *_contrib_quantized_poolingLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC",
    "NWC"
  };
  return Operator("_contrib_quantized_pooling")
           .SetParam("kernel", kernel)
           .SetParam("pool_type", _contrib_quantized_poolingPoolTypeValues[int(pool_type)])
           .SetParam("global_pool", global_pool)
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("pooling_convention", _contrib_quantized_poolingPoolingConventionValues[int(pooling_convention)])
           .SetParam("stride", stride)
           .SetParam("pad", pad)
           .SetParam("p_value", p_value)
           .SetParam("count_include_pad", count_include_pad)
           .SetParam("layout", _contrib_quantized_poolingLayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .CreateSymbol();
}

/*!
 * \brief Performs pooling on the input.
 *
 *        The shapes for 1-D pooling are
 *
 *        - **data** and **out**: *(batch_size, channel, width)* (NCW layout) or
 *        *(batch_size, width, channel)* (NWC layout),
 *
 *        The shapes for 2-D pooling are
 *
 *        - **data** and **out**: *(batch_size, channel, height, width)* (NCHW layout) or
 *        *(batch_size, height, width, channel)* (NHWC layout),
 *
 *        out_height = f(height, kernel[0], pad[0], stride[0])
 *        out_width = f(width, kernel[1], pad[1], stride[1])
 *
 *        The definition of *f* depends on ``pooling_convention``, which has two options:
 *
 *        - **valid** (default)::
 *
 *        f(x, k, p, s) = floor((x+2*p-k)/s)+1
 *
 *        - **full**, which is compatible with Caffe::
 *
 *        f(x, k, p, s) = ceil((x+2*p-k)/s)+1
 *
 *        When ``global_pool`` is set to be true, then global pooling is performed. It
 *        ``kernel=(height, width)`` and set the appropiate padding to 0.
 *
 *        Three pooling options are supported by ``pool_type``:
 *
 *        - **avg**: average pooling
 *        - **max**: max pooling
 *        - **sum**: sum pooling
 *        - **lp**: Lp pooling
 *
 *        For 3-D pooling, an additional *depth* dimension is added before
 *        *height*. Namely the input data and output will have shape *(batch_size,
 *        height, width)* (NCDHW layout) or *(batch_size, depth, height, width, channel)*
 *
 *        Notes on Lp pooling:
 *
 *        Lp pooling was first introduced by this paper:
 *        L-1 pooling is simply sum pooling, while L-inf pooling is simply max pooling.
 *        We can see that Lp pooling stands between those two, in practice the most
 *
 *        For each window ``X``, the mathematical expression for Lp pooling is:
 *
 *        :math:`f(X) = \sqrt[p]{\sum_{x}^{X} x^p}`
 *
 *
 *
 *        Defined in src/operator/nn/pooling.cc:L416
 * \param data Input data to the pooling operator.
 * \param kernel Pooling kernel size: (y, x) or (d, y, x)
 * \param pool_type Pooling type to be applied.
 * \param global_pool Ignore kernel size, do global pooling based on current input
 * \param cudnn_off Turn off cudnn pooling and use MXNet pooling operator.
 * \param pooling_convention Pooling convention to be applied.
 * \param stride Stride: for pooling (y, x) or (d, y, x). Defaults to 1 for each
 * \param pad Pad for pooling: (y, x) or (d, y, x). Defaults to no padding.
 * \param p_value Value of p for Lp pooling, can be 1 or 2, required for Lp Pooling.
 * \param count_include_pad Only used for AvgPool, specify whether to count padding
 *        elements for averagecalculation. For example, with a 5*5 kernel on a 3*3 corner
 *        of a image,the sum of the 9 valid elements will be divided by 25 if this is set
 * \param layout Set layout for input and output. Empty for
 *        default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.
 * \return new symbol
 */
inline Symbol Pooling(Symbol data,
                      Shape kernel = {},
                      PoolingPoolType pool_type = PoolingPoolType::kMax,
                      bool global_pool = false,
                      bool cudnn_off = false,
                      PoolingPoolingConvention pooling_convention = PoolingPoolingConvention::kValid,
                      Shape stride = {},
                      Shape pad = {},
                      dmlc::optional<int> p_value = dmlc::optional<int>(),
                      dmlc::optional<bool> count_include_pad = dmlc::optional<bool>(),
                      PoolingLayout layout = PoolingLayout::kNone) {
  static const char *PoolingPoolTypeValues[] = {
    "avg",
    "lp",
    "max",
    "sum"
  };
  static const char *PoolingPoolingConventionValues[] = {
    "full",
    "same",
    "valid"
  };
  static const char *PoolingLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC",
    "NWC"
  };
  return Operator("Pooling")
           .SetParam("kernel", kernel)
           .SetParam("pool_type", PoolingPoolTypeValues[int(pool_type)])
           .SetParam("global_pool", global_pool)
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("pooling_convention", PoolingPoolingConventionValues[int(pooling_convention)])
           .SetParam("stride", stride)
           .SetParam("pad", pad)
           .SetParam("p_value", p_value)
           .SetParam("count_include_pad", count_include_pad)
           .SetParam("layout", PoolingLayoutValues[int(layout)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Quantize a input tensor from float to `out_type`,
 *        with user-specified `min_calib_range` and `max_calib_range` or the input range
 *
 *        Output `min_range` and `max_range` are scalar floats that specify the range for
 *
 *        When out_type is `uint8`, the output is calculated using the following equation:
 *
 *        `out[i] = (in[i] - min_range) * range(OUTPUT_TYPE) / (max_range - min_range) +
 *
 *        where `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`.
 *
 *        When out_type is `int8`, the output is calculate using the following equation
 *        by keep zero centered for the quantized value:
 *
 *        `out[i] = sign(in[i]) * min(abs(in[i] * scale + 0.5f, quantized_range)`,
 *
 *        where
 *        `quantized_range = MinAbs(max(int8), min(int8))` and
 *        `scale = quantized_range / MaxAbs(min_range, max_range).`
 *
 *        When out_type is `auto`, the output type is automatically determined by
 *        If min_calib_range < 0.0f, the output type will be int8, otherwise will be
 *        If min_calib_range isn't presented, the output type will be int8.
 *
 *        .. Note::
 *        This operator only supports forward propagation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/quantize_v2.cc:L92
 * \param data A ndarray/symbol of type `float32`
 * \param out_type Output data type. `auto` can be specified to automatically determine
 * \param min_calib_range The minimum scalar value in the form of float32. If present, it
 * \param max_calib_range The maximum scalar value in the form of float32. If present, it
 * \return new symbol
 */
inline Symbol _contrib_quantize_v2(Symbol data,
                                   _contrib_quantize_v2OutType out_type = _contrib_quantize_v2OutType::kInt8,
                                   mx_float min_calib_range = mx_float(),
                                   mx_float max_calib_range = mx_float()) {
  static const char *_contrib_quantize_v2OutTypeValues[] = {
    "auto",
    "int8",
    "uint8"
  };
  return Operator("_contrib_quantize_v2")
           .SetParam("out_type", _contrib_quantize_v2OutTypeValues[int(out_type)])
           .SetParam("min_calib_range", min_calib_range)
           .SetParam("max_calib_range", max_calib_range)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Joins input arrays along a given axis.
 *
 *        The dimensions of the input arrays should be the same except the axis along
 *        which they will be concatenated.
 *        The dimension of the output array along the concatenated axis will be equal
 *        to the sum of the corresponding dimensions of the input arrays.
 *        All inputs with different min/max will be rescaled by using largest [min, max]
 *        If any input holds int8, then the output will be int8. Otherwise output will be
 *
 *
 *
 *        Defined in src/operator/quantization/quantized_concat.cc:L108
 * \param data List of arrays to concatenate
 * \param num_args Number of inputs to be concated.
 * \param dim the dimension to be concated.
 * \return new symbol
 */
inline Symbol _contrib_quantized_concat(const std::vector<Symbol>& data,
                                        int num_args,
                                        int dim = 1) {
  return Operator("_contrib_quantized_concat")
           .SetParam("num_args", num_args)
           .SetParam("dim", dim)
(data)
           .CreateSymbol();
}

/*!
 * \brief Joins input arrays along a given axis.
 *
 *        .. note:: `Concat` is deprecated. Use `concat` instead.
 *
 *        The dimensions of the input arrays should be the same except the axis along
 *        which they will be concatenated.
 *        The dimension of the output array along the concatenated axis will be equal
 *        to the sum of the corresponding dimensions of the input arrays.
 *
 *        The storage type of ``concat`` output depends on storage types of inputs
 *
 *        - concat(csr, csr, ..., csr, dim=0) = csr
 *        - otherwise, ``concat`` generates output with default storage
 *
 *        Example::
 *
 *        x = [[1,1],[2,2]]
 *        y = [[3,3],[4,4],[5,5]]
 *        z = [[6,6], [7,7],[8,8]]
 *
 *        concat(x,y,z,dim=0) = [[ 1.,  1.],
 *        [ 2.,  2.],
 *        [ 3.,  3.],
 *        [ 4.,  4.],
 *        [ 5.,  5.],
 *        [ 6.,  6.],
 *        [ 7.,  7.],
 *        [ 8.,  8.]]
 *
 *        Note that you cannot concat x,y,z along dimension 1 since dimension
 *        0 is not the same for all the input arrays.
 *
 *        concat(y,z,dim=1) = [[ 3.,  3.,  6.,  6.],
 *        [ 4.,  4.,  7.,  7.],
 *        [ 5.,  5.,  8.,  8.]]
 *
 *
 *
 *        Defined in src/operator/nn/concat.cc:L371
 * \param data List of arrays to concatenate
 * \param num_args Number of inputs to be concated.
 * \param dim the dimension to be concated.
 * \return new symbol
 */
inline Symbol Concat(const std::vector<Symbol>& data,
                     int num_args,
                     int dim = 1) {
  return Operator("Concat")
           .SetParam("num_args", num_args)
           .SetParam("dim", dim)
(data)
           .CreateSymbol();
}

/*!
 * \brief Given data that is quantized in int32 and the corresponding thresholds,
 *        requantize the data into int8 using min and max thresholds either calculated at
 *        or from calibration. It's highly recommended to pre-calucate the min and max
 *        through calibration since it is able to save the runtime of the operator and
 *        inference accuracy.
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/requantize.cc:L60
 * \param data A ndarray/symbol of type `int32`
 * \param min_range The original minimum scalar value in the form of float32 used for
 * \param max_range The original maximum scalar value in the form of float32 used for
 * \param out_type Output data type. `auto` can be specified to automatically determine
 * \param min_calib_range The minimum scalar value in the form of float32 obtained
 *        through calibration. If present, it will be used to requantize the int32 data
 * \param max_calib_range The maximum scalar value in the form of float32 obtained
 *        through calibration. If present, it will be used to requantize the int32 data
 * \return new symbol
 */
inline Symbol _contrib_requantize(Symbol data,
                                  Symbol min_range,
                                  Symbol max_range,
                                  _contrib_requantizeOutType out_type = _contrib_requantizeOutType::kInt8,
                                  mx_float min_calib_range = mx_float(),
                                  mx_float max_calib_range = mx_float()) {
  static const char *_contrib_requantizeOutTypeValues[] = {
    "auto",
    "int8",
    "uint8"
  };
  return Operator("_contrib_requantize")
           .SetParam("out_type", _contrib_requantizeOutTypeValues[int(out_type)])
           .SetParam("min_calib_range", min_calib_range)
           .SetParam("max_calib_range", max_calib_range)
           .SetInput("data", data)
           .SetInput("min_range", min_range)
           .SetInput("max_range", max_range)
           .CreateSymbol();
}

/*!
 * \brief Activation operator for input and output data type of int8.
 *        The input and output data comes with min and max thresholds for quantizing
 *        the float32 data into int8.
 *
 *        .. Note::
 *        This operator only supports forward propogation. DO NOT use it in training.
 *        This operator only supports `relu`
 *
 *        Defined in src/operator/quantization/quantized_activation.cc:L91
 * \param data Input data.
 * \param min_data Minimum value of data.
 * \param max_data Maximum value of data.
 * \param act_type Activation function to be applied.
 * \return new symbol
 */
inline Symbol _contrib_quantized_act(Symbol data,
                                     Symbol min_data,
                                     Symbol max_data,
                                     _contrib_quantized_actActType act_type) {
  static const char *_contrib_quantized_actActTypeValues[] = {
    "relu",
    "sigmoid",
    "softrelu",
    "softsign",
    "tanh"
  };
  return Operator("_contrib_quantized_act")
           .SetParam("act_type", _contrib_quantized_actActTypeValues[int(act_type)])
           .SetInput("data", data)
           .SetInput("min_data", min_data)
           .SetInput("max_data", max_data)
           .CreateSymbol();
}

/*!
 * \brief Applies an activation function element-wise to the input.
 *
 *        The following activation functions are supported:
 *
 *        - `relu`: Rectified Linear Unit, :math:`y = max(x, 0)`
 *        - `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}`
 *        - `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) +
 *        - `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))`
 *        - `softsign`: :math:`y = \frac{x}{1 + abs(x)}`
 *
 *
 *
 *        Defined in src/operator/nn/activation.cc:L167
 * \param data The input array.
 * \param act_type Activation function to be applied.
 * \return new symbol
 */
inline Symbol Activation(Symbol data,
                         ActivationActType act_type) {
  static const char *ActivationActTypeValues[] = {
    "relu",
    "sigmoid",
    "softrelu",
    "softsign",
    "tanh"
  };
  return Operator("Activation")
           .SetParam("act_type", ActivationActTypeValues[int(act_type)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Quantize a input tensor from float to `out_type`,
 *        with user-specified `min_range` and `max_range`.
 *
 *        min_range and max_range are scalar floats that specify the range for
 *        the input data.
 *
 *        When out_type is `uint8`, the output is calculated using the following equation:
 *
 *        `out[i] = (in[i] - min_range) * range(OUTPUT_TYPE) / (max_range - min_range) +
 *
 *        where `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`.
 *
 *        When out_type is `int8`, the output is calculate using the following equation
 *        by keep zero centered for the quantized value:
 *
 *        `out[i] = sign(in[i]) * min(abs(in[i] * scale + 0.5f, quantized_range)`,
 *
 *        where
 *        `quantized_range = MinAbs(max(int8), min(int8))` and
 *        `scale = quantized_range / MaxAbs(min_range, max_range).`
 *
 *        .. Note::
 *        This operator only supports forward propagation. DO NOT use it in training.
 *
 *        Defined in src/operator/quantization/quantize.cc:L74
 * \param data A ndarray/symbol of type `float32`
 * \param min_range The minimum scalar value possibly produced for the input
 * \param max_range The maximum scalar value possibly produced for the input
 * \param out_type Output data type.
 * \return new symbol
 */
inline Symbol _contrib_quantize(Symbol data,
                                Symbol min_range,
                                Symbol max_range,
                                _contrib_quantizeOutType out_type = _contrib_quantizeOutType::kUint8) {
  static const char *_contrib_quantizeOutTypeValues[] = {
    "int8",
    "uint8"
  };
  return Operator("_contrib_quantize")
           .SetParam("out_type", _contrib_quantizeOutTypeValues[int(out_type)])
           .SetInput("data", data)
           .SetInput("min_range", min_range)
           .SetInput("max_range", max_range)
           .CreateSymbol();
}

/*!
 * \brief Apply a custom operator implemented in a frontend language (like Python).
 *
 *        Custom operators should override required methods like `forward` and `backward`.
 *        The custom operator must be registered before it can be used.
 *        Please check the tutorial here: http://mxnet.io/faq/new_op.html.
 *
 *
 *
 *        Defined in src/operator/custom/custom.cc:L546
 * \param data Input data for the custom operator.
 * \param op_type Name of the custom operator. This is the name that is passed to
 * \return new symbol
 */
inline Symbol Custom(const std::vector<Symbol>& data,
                     const std::string& op_type) {
  return Operator("Custom")
(data)
           .CreateSymbol();
}

/*!
 * \brief Batch normalization.
 *
 *        Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
 *        well as offset ``beta``.
 *        Standard BN [1]_ implementation only normalize the data within each device.
 *        SyncBN normalizes the input within the whole mini-batch.
 *        We follow the sync-onece implmentation described in the paper [2]_.
 *
 *        Assume the input has more than one dimension and we normalize along axis 1.
 *        We first compute the mean and variance along this axis:
 *
 *        .. math::
 *
 *        data\_mean[i] = mean(data[:,i,:,...]) \\
 *        data\_var[i] = var(data[:,i,:,...])
 *
 *        Then compute the normalized output, which has the same shape as input, as
 *
 *        .. math::
 *
 *        out[:,i,:,...] = \frac{data[:,i,:,...] -
 *
 *        Both *mean* and *var* returns a scalar by treating the input as a vector.
 *
 *        Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 *        have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
 *        ``data_var`` as well, which are needed for the backward pass.
 *
 *        Besides the inputs and the outputs, this operator accepts two auxiliary
 *        states, ``moving_mean`` and ``moving_var``, which are *k*-length
 *        vectors. They are global statistics for the whole dataset, which are updated
 *        by::
 *
 *        moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
 *        moving_var = moving_var * momentum + data_var * (1 - momentum)
 *
 *        If ``use_global_stats`` is set to be true, then ``moving_mean`` and
 *        ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
 *        the output. It is often used during inference.
 *
 *        Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
 *        then set ``gamma`` to 1 and its gradient to 0.
 *
 *        Reference:
 *        .. [1] Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating
 *        deep network training by reducing internal covariate shift." *ICML 2015*
 *        .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, \
 *        Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation."
 *
 *
 *        Defined in src/operator/contrib/sync_batch_norm.cc:L97
 * \param data Input data to batch normalization
 * \param gamma gamma array
 * \param beta beta array
 * \param moving_mean running mean of input
 * \param moving_var running variance of input
 * \param key Hash key for synchronization, please set the same hash key for same layer,
 * \param eps Epsilon to prevent div 0
 * \param momentum Momentum for moving average
 * \param fix_gamma Fix gamma while training
 * \param use_global_stats Whether use global moving statistics instead of local
 * \param output_mean_var Output All,normal mean and var
 * \param ndev The count of GPU devices
 * \return new symbol
 */
inline Symbol _contrib_SyncBatchNorm(Symbol data,
                                     Symbol gamma,
                                     Symbol beta,
                                     Symbol moving_mean,
                                     Symbol moving_var,
                                     const std::string& key,
                                     mx_float eps = 0.00100000005,
                                     mx_float momentum = 0.899999976,
                                     bool fix_gamma = true,
                                     bool use_global_stats = false,
                                     bool output_mean_var = false,
                                     int ndev = 1) {
  return Operator("_contrib_SyncBatchNorm")
           .SetParam("eps", eps)
           .SetParam("momentum", momentum)
           .SetParam("fix_gamma", fix_gamma)
           .SetParam("use_global_stats", use_global_stats)
           .SetParam("output_mean_var", output_mean_var)
           .SetParam("ndev", ndev)
           .SetInput("data", data)
           .SetInput("gamma", gamma)
           .SetInput("beta", beta)
           .SetInput("moving_mean", moving_mean)
           .SetInput("moving_var", moving_var)
           .CreateSymbol();
}

/*!
 * \brief This operator samples sub-graphs from a csr graph via an
 *        uniform probability. The operator is designed for DGL.
 *
 *        The operator outputs three sets of NDArrays to represent the sampled results
 *        (the number of NDArrays in each set is the same as the number of seed NDArrays):
 *        1) a set of 1D NDArrays containing the sampled vertices, 2) a set of
 *        the sampled edges, 3) a set of 1D NDArrays indicating the layer where a vertex
 *        The first set of 1D NDArrays have a length of max_num_vertices+1. The last
 *        indicate the acutal number of vertices in a subgraph. The third set of NDArrays
 *        of max_num_vertices, and the valid number of vertices is the same as the ones
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        shape = (5, 5)
 *        data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
 *        indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
 *        indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
 *        a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
 *        a.asnumpy()
 *        seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
 *        out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2,
 *
 *        out[0]
 *        [0 1 2 3 4 5]
 *        <NDArray 6 @cpu(0)>
 *
 *        out[1].asnumpy()
 *        array([[ 0,  1,  0,  3,  0],
 *        [ 5,  0,  0,  7,  0],
 *        [ 9,  0,  0, 11,  0],
 *        [13,  0, 15,  0,  0],
 *        [17,  0, 19,  0,  0]])
 *
 *        out[2]
 *        [0 0 0 0 0]
 *        <NDArray 5 @cpu(0)>
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L784
 * \param csr_matrix csr matrix
 * \param seed_arrays seed vertices
 * \param num_args Number of input NDArray.
 * \param num_hops Number of hops.
 * \param num_neighbor Number of neighbor.
 * \param max_num_vertices Max number of vertices.
 * \return new symbol
 */
inline Symbol _contrib_dgl_csr_neighbor_uniform_sample(Symbol csr_matrix,
                                                       const std::vector<Symbol>& seed_arrays,
                                                       int num_args,
                                                       int64_t num_hops = 1,
                                                       int64_t num_neighbor = 2,
                                                       int64_t max_num_vertices = 100) {
  return Operator("_contrib_dgl_csr_neighbor_uniform_sample")
           .SetParam("num_args", num_args)
           .SetParam("num_hops", num_hops)
           .SetParam("num_neighbor", num_neighbor)
           .SetParam("max_num_vertices", max_num_vertices)
           .SetInput("csr_matrix", csr_matrix)
(seed_arrays)
           .CreateSymbol();
}

/*!
 * \brief This operator samples sub-graph from a csr graph via an
 *        non-uniform probability. The operator is designed for DGL.
 *
 *        The operator outputs four sets of NDArrays to represent the sampled results
 *        (the number of NDArrays in each set is the same as the number of seed NDArrays):
 *        1) a set of 1D NDArrays containing the sampled vertices, 2) a set of
 *        the sampled edges, 3) a set of 1D NDArrays with the probability that vertices
 *        4) a set of 1D NDArrays indicating the layer where a vertex is sampled.
 *        The first set of 1D NDArrays have a length of max_num_vertices+1. The last
 *        indicate the acutal number of vertices in a subgraph. The third and fourth set
 *        of max_num_vertices, and the valid number of vertices is the same as the ones
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        shape = (5, 5)
 *        prob = mx.nd.array([0.9, 0.8, 0.2, 0.4, 0.1], dtype=np.float32)
 *        data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
 *        indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
 *        indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
 *        a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
 *        seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
 *        out = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(a, prob, seed,
 *
 *        out[0]
 *        [0 1 2 3 4 5]
 *        <NDArray 6 @cpu(0)>
 *
 *        out[1].asnumpy()
 *        array([[ 0,  1,  2,  0,  0],
 *        [ 5,  0,  6,  0,  0],
 *        [ 9, 10,  0,  0,  0],
 *        [13, 14,  0,  0,  0],
 *        [ 0, 18, 19,  0,  0]])
 *
 *        out[2]
 *        [0.9 0.8 0.2 0.4 0.1]
 *        <NDArray 5 @cpu(0)>
 *
 *        out[3]
 *        [0 0 0 0 0]
 *        <NDArray 5 @cpu(0)>
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L883
 * \param csr_matrix csr matrix
 * \param probability probability vector
 * \param seed_arrays seed vertices
 * \param num_args Number of input NDArray.
 * \param num_hops Number of hops.
 * \param num_neighbor Number of neighbor.
 * \param max_num_vertices Max number of vertices.
 * \return new symbol
 */
inline Symbol _contrib_dgl_csr_neighbor_non_uniform_sample(Symbol csr_matrix,
                                                           Symbol probability,
                                                           const std::vector<Symbol>& seed_arrays,
                                                           int num_args,
                                                           int64_t num_hops = 1,
                                                           int64_t num_neighbor = 2,
                                                           int64_t max_num_vertices = 100) {
  return Operator("_contrib_dgl_csr_neighbor_non_uniform_sample")
           .SetParam("num_args", num_args)
           .SetParam("num_hops", num_hops)
           .SetParam("num_neighbor", num_neighbor)
           .SetParam("max_num_vertices", max_num_vertices)
           .SetInput("csr_matrix", csr_matrix)
           .SetInput("probability", probability)
(seed_arrays)
           .CreateSymbol();
}

/*!
 * \brief This operator constructs an induced subgraph for
 *        a given set of vertices from a graph. The operator accepts multiple
 *        sets of vertices as input. For each set of vertices, it returns a pair
 *        of CSR matrices if return_mapping is True: the first matrix contains edges
 *        with new edge Ids, the second matrix contains edges with the original
 *        edge Ids.
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        x=[[1, 0, 0, 2],
 *        [3, 0, 4, 0],
 *        [0, 5, 0, 0],
 *        [0, 6, 7, 0]]
 *        v = [0, 1, 2]
 *        dgl_subgraph(x, v, return_mapping=True) =
 *        [[1, 0, 0],
 *        [2, 0, 3],
 *        [0, 4, 0]],
 *        [[1, 0, 0],
 *        [3, 0, 4],
 *        [0, 5, 0]]
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L1140
 * \param graph Input graph where we sample vertices.
 * \param data The input arrays that include data arrays and states.
 * \param num_args Number of input arguments, including all symbol inputs.
 * \param return_mapping Return mapping of vid and eid between the subgraph and the
 * \return new symbol
 */
inline Symbol _contrib_dgl_subgraph(Symbol graph,
                                    const std::vector<Symbol>& data,
                                    int num_args,
                                    bool return_mapping) {
  return Operator("_contrib_dgl_subgraph")
           .SetParam("num_args", num_args)
           .SetParam("return_mapping", return_mapping)
           .SetInput("graph", graph)
(data)
           .CreateSymbol();
}

/*!
 * \brief This operator implements the edge_id function for a graph
 *        stored in a CSR matrix (the value of the CSR stores the edge Id of the graph).
 *        output[i] = input[u[i], v[i]] if there is an edge between u[i] and v[i]],
 *        otherwise output[i] will be -1. Both u and v should be 1D vectors.
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        x = [[ 1, 0, 0 ],
 *        [ 0, 2, 0 ],
 *        [ 0, 0, 3 ]]
 *        u = [ 0, 0, 1, 1, 2, 2 ]
 *        v = [ 0, 1, 1, 2, 0, 2 ]
 *        edge_id(x, u, v) = [ 1, -1, 2, -1, -1, 3 ]
 *
 *        The storage type of ``edge_id`` output depends on storage types of inputs
 *        - edge_id(csr, default, default) = default
 *        - default and rsp inputs are not supported
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L1321
 * \param data Input ndarray
 * \param u u ndarray
 * \param v v ndarray
 * \return new symbol
 */
inline Symbol _contrib_edge_id(Symbol data,
                               Symbol u,
                               Symbol v) {
  return Operator("_contrib_edge_id")
           .SetInput("data", data)
           .SetInput("u", u)
           .SetInput("v", v)
           .CreateSymbol();
}

/*!
 * \brief This operator converts a CSR matrix whose values are edge Ids
 *        to an adjacency matrix whose values are ones. The output CSR matrix always has
 *        the data value of float32.
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        x = [[ 1, 0, 0 ],
 *        [ 0, 2, 0 ],
 *        [ 0, 0, 3 ]]
 *        dgl_adjacency(x) =
 *        [[ 1, 0, 0 ],
 *        [ 0, 1, 0 ],
 *        [ 0, 0, 1 ]]
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L1393
 * \param data Input ndarray
 * \return new symbol
 */
inline Symbol _contrib_dgl_adjacency(Symbol data) {
  return Operator("_contrib_dgl_adjacency")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief This operator compacts a CSR matrix generated by
 *        dgl_csr_neighbor_uniform_sample and dgl_csr_neighbor_non_uniform_sample.
 *        The CSR matrices generated by these two operators may have many empty
 *        rows at the end and many empty columns. This operator removes these
 *        empty rows and empty columns.
 *
 *        Example:
 *
 *        .. code:: python
 *
 *        shape = (5, 5)
 *        data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
 *        indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
 *        indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
 *        a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
 *        seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
 *        out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2,
 *        num_neighbor=2, max_num_vertices=6)
 *        subg_v = out[0]
 *        subg = out[1]
 *        compact = mx.nd.contrib.dgl_graph_compact(subg, subg_v,
 *        graph_sizes=(subg_v[-1].asnumpy()[0]), return_mapping=False)
 *
 *        compact.asnumpy()
 *        array([[0, 0, 0, 1, 0],
 *        [2, 0, 3, 0, 0],
 *        [0, 4, 0, 0, 5],
 *        [0, 6, 0, 0, 7],
 *        [8, 9, 0, 0, 0]])
 *
 *
 *
 *        Defined in src/operator/contrib/dgl_graph.cc:L1582
 * \param graph_data Input graphs and input vertex Ids.
 * \param num_args Number of input arguments.
 * \param return_mapping Return mapping of vid and eid between the subgraph and the
 * \param graph_sizes the number of vertices in each graph.
 * \return new symbol
 */
inline Symbol _contrib_dgl_graph_compact(const std::vector<Symbol>& graph_data,
                                         int num_args,
                                         bool return_mapping,
                                         nnvm::Tuple<int64_t> graph_sizes) {
  return Operator("_contrib_dgl_graph_compact")
           .SetParam("num_args", num_args)
           .SetParam("return_mapping", return_mapping)
           .SetParam("graph_sizes", graph_sizes)
(graph_data)
           .CreateSymbol();
}

/*!
 * \brief Computes the Khatri-Rao product of the input matrices.
 *
 *        Given a collection of :math:`n` input matrices,
 *
 *        .. math::
 *        A_1 \in \mathbb{R}^{M_1 \times M}, \ldots, A_n \in \mathbb{R}^{M_n \times N},
 *
 *        the (column-wise) Khatri-Rao product is defined as the matrix,
 *
 *        .. math::
 *        X = A_1 \otimes \cdots \otimes A_n \in \mathbb{R}^{(M_1 \cdots M_n) \times N},
 *
 *        where the :math:`k` th column is equal to the column-wise outer product
 *        :math:`{A_1}_k \otimes \cdots \otimes {A_n}_k` where :math:`{A_i}_k` is the kth
 *        column of the ith matrix.
 *
 *        Example::
 *
 *        >>> A = mx.nd.array([[1, -1],
 *        >>>                  [2, -3]])
 *        >>> B = mx.nd.array([[1, 4],
 *        >>>                  [2, 5],
 *        >>>                  [3, 6]])
 *        >>> C = mx.nd.khatri_rao(A, B)
 *        >>> print(C.asnumpy())
 *        [[  1.  -4.]
 *        [  2.  -5.]
 *        [  3.  -6.]
 *        [  2. -12.]
 *        [  4. -15.]
 *        [  6. -18.]]
 *
 *
 *
 *        Defined in src/operator/contrib/krprod.cc:L108
 * \param args Positional input matrices
 * \return new symbol
 */
inline Symbol khatri_rao(const std::vector<Symbol>& args) {
  return Operator("khatri_rao")
(args)
           .CreateSymbol();
}

/*!
 * \brief Computes the log likelihood of a univariate Hawkes process.
 *
 *        The log likelihood is calculated on point process observations represented
 *        as *ragged* matrices for *lags* (interarrival times w.r.t. the previous point),
 *        and *marks* (identifiers for the process ID). Note that each mark is considered
 *        i.e., computes the joint likelihood of a set of Hawkes processes determined by
 *
 *        .. math::
 *
 *        \lambda_k^*(t) = \lambda_k + \alpha_k \sum_{\{t_i < t, y_i = k\}} \beta_k
 *
 *        where :math:`\lambda_k` specifies the background intensity ``lda``,
 *        :math:`\alpha_k` specifies the *branching ratio* or ``alpha``, and
 *
 *        ``lags`` and ``marks`` are two NDArrays of shape (N, T) and correspond to the
 *        representation of the point process observation, the first dimension
 *        corresponds to the batch index, and the second to the sequence. These are
 *        "left-aligned" *ragged* matrices (the first index of the second dimension is
 *        the beginning of every sequence. The length of each sequence is given by
 *        ``valid_length``, of shape (N,) where ``valid_length[i]`` corresponds to the
 *
 *        ``max_time`` is the length of the observation period of the point process. That
 *        is, specifying ``max_time[i] = 5`` computes the likelihood of the i-th sample
 *        as observed on the time interval :math:`(0, 5]`. Naturally, the sum of all
 *
 *        The input ``state`` specifies the *memory* of the Hawkes process. Invoking the
 *
 *        .. math::
 *
 *        s_k(t) = \sum_{t_i < t} \exp(-\beta_k (t - t_i)).
 *
 *        The ``state`` to be provided is :math:`s_k(0)` and carries the added intensity
 *        due to past events before the current batch. :math:`s_k(T)` is returned from
 *
 *        Example::
 *
 *        # define the Hawkes process parameters
 *        lda = nd.array([1.5, 2.0, 3.0]).tile((N, 1))
 *        alpha = nd.array([0.2, 0.3, 0.4])  # branching ratios should be < 1
 *        beta = nd.array([1.0, 2.0, 3.0])
 *
 *        # the "data", or observations
 *        ia_times = nd.array([[6, 7, 8, 9], [1, 2, 3, 4], [3, 4, 5, 6], [8, 9, 10, 11]])
 *        marks = nd.zeros((N, T)).astype(np.int32)
 *
 *        # starting "state" of the process
 *        states = nd.zeros((N, K))
 *
 *        valid_length = nd.array([1, 2, 3, 4])  # number of valid points in each sequence
 *        max_time = nd.ones((N,)) * 100.0  # length of the observation period
 *
 *        A = nd.contrib.hawkesll(
 *        lda, alpha, beta, states, ia_times, marks, valid_length, max_time
 *        )
 *
 *        References:
 *
 *        -  Bacry, E., Mastromatteo, I., & Muzy, J. F. (2015).
 *        Hawkes processes in finance. Market Microstructure and Liquidity
 *        , 1(01), 1550005.
 *
 *
 *        Defined in src/operator/contrib/hawkes_ll.cc:L84
 * \param lda Shape (N, K) The intensity for each of the K processes, for each sample
 * \param alpha Shape (K,) The infectivity factor (branching ratio) for each process
 * \param beta Shape (K,) The decay parameter for each process
 * \param state Shape (N, K) the Hawkes state for each process
 * \param lags Shape (N, T) the interarrival times
 * \param marks Shape (N, T) the marks (process ids)
 * \param valid_length The number of valid points in the process
 * \param max_time the length of the interval where the processes were sampled
 * \return new symbol
 */
inline Symbol _contrib_hawkesll(Symbol lda,
                                Symbol alpha,
                                Symbol beta,
                                Symbol state,
                                Symbol lags,
                                Symbol marks,
                                Symbol valid_length,
                                Symbol max_time) {
  return Operator("_contrib_hawkesll")
           .SetInput("lda", lda)
           .SetInput("alpha", alpha)
           .SetInput("beta", beta)
           .SetInput("state", state)
           .SetInput("lags", lags)
           .SetInput("marks", marks)
           .SetInput("valid_length", valid_length)
           .SetInput("max_time", max_time)
           .CreateSymbol();
}

/*!
 * \brief
 * \return new symbol
 */
inline Symbol _contrib_backward_hawkesll() {
  return Operator("_contrib_backward_hawkesll")
           .CreateSymbol();
}

/*!
 * \brief Number of stored values for a sparse tensor, including explicit zeros.
 *
 *        This operator only supports CSR matrix on CPU.
 *
 *
 *
 *        Defined in src/operator/contrib/nnz.cc:L177
 * \param data Input
 * \param axis Select between the number of values across the whole matrix, in each
 * \return new symbol
 */
inline Symbol _contrib_getnnz(Symbol data,
                              dmlc::optional<int> axis = dmlc::optional<int>()) {
  return Operator("_contrib_getnnz")
           .SetParam("axis", axis)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief This operator implements the gradient multiplier function.
 *        In forward pass it acts as an identity transform. During backpropagation it
 *        multiplies the gradient from the subsequent level by a scalar factor lambda and
 *        the preceding layer.
 *
 *
 *        Defined in src/operator/contrib/gradient_multiplier_op.cc:L78
 * \param data The input array.
 * \param scalar lambda multiplier
 * \return new symbol
 */
inline Symbol _contrib_gradientmultiplier(Symbol data,
                                          mx_float scalar) {
  return Operator("_contrib_gradientmultiplier")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \param data source input
 * \param scalar scalar input
 * \return new symbol
 */
inline Symbol _contrib_backward_gradientmultiplier(Symbol data,
                                                   mx_float scalar) {
  return Operator("_contrib_backward_gradientmultiplier")
           .SetParam("scalar", scalar)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Update function for multi-precision AdamW optimizer.
 *
 *        AdamW is seen as a modification of Adam by decoupling the weight decay from the
 *        optimization steps taken w.r.t. the loss function.
 *
 *        Adam update consists of the following steps, where g represents gradient and m,
 *        are 1st and 2nd order moment estimates (mean and variance).
 *
 *        .. math::
 *
 *        g_t = \nabla J(W_{t-1})\\
 *        m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t\\
 *        v_t = \beta_2 v_{t-1} + (1 - \beta_2) g_t^2\\
 *        W_t = W_{t-1} - \eta_t (\alpha \frac{ m_t }{ \sqrt{ v_t } + \epsilon } + wd
 *
 *        It updates the weights using::
 *
 *        m = beta1*m + (1-beta1)*grad
 *        v = beta2*v + (1-beta2)*(grad**2)
 *        w -= eta * (learning_rate * m / (sqrt(v) + epsilon) + w * wd)
 *
 *        Note that gradient is rescaled to grad = rescale_grad * grad. If rescale_grad
 *        the update is skipped.
 *
 *
 *        Defined in src/operator/contrib/adamw.cc:L77
 * \param weight Weight
 * \param grad Gradient
 * \param mean Moving mean
 * \param var Moving variance
 * \param weight32 Weight32
 * \param rescale_grad Rescale gradient to rescale_grad * grad. If NaN, Inf, or 0, the
 * \param lr Learning rate
 * \param eta Learning rate schedule multiplier
 * \param beta1 The decay rate for the 1st moment estimates.
 * \param beta2 The decay rate for the 2nd moment estimates.
 * \param epsilon A small constant for numerical stability.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol _mp_adamw_update(Symbol weight,
                               Symbol grad,
                               Symbol mean,
                               Symbol var,
                               Symbol weight32,
                               Symbol rescale_grad,
                               mx_float lr,
                               mx_float eta,
                               mx_float beta1 = 0.899999976,
                               mx_float beta2 = 0.999000013,
                               mx_float epsilon = 9.99999994e-09,
                               mx_float wd = 0,
                               mx_float clip_gradient = -1) {
  return Operator("_mp_adamw_update")
           .SetParam("lr", lr)
           .SetParam("eta", eta)
           .SetParam("beta1", beta1)
           .SetParam("beta2", beta2)
           .SetParam("epsilon", epsilon)
           .SetParam("wd", wd)
           .SetParam("clip_gradient", clip_gradient)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mean", mean)
           .SetInput("var", var)
           .SetInput("weight32", weight32)
           .SetInput("rescale_grad", rescale_grad)
           .CreateSymbol();
}

/*!
 * \brief Update function for AdamW optimizer. AdamW is seen as a modification of
 *        Adam by decoupling the weight decay from the optimization steps taken w.r.t.
 *
 *        Adam update consists of the following steps, where g represents gradient and m,
 *        are 1st and 2nd order moment estimates (mean and variance).
 *
 *        .. math::
 *
 *        g_t = \nabla J(W_{t-1})\\
 *        m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t\\
 *        v_t = \beta_2 v_{t-1} + (1 - \beta_2) g_t^2\\
 *        W_t = W_{t-1} - \eta_t (\alpha \frac{ m_t }{ \sqrt{ v_t } + \epsilon } + wd
 *
 *        It updates the weights using::
 *
 *        m = beta1*m + (1-beta1)*grad
 *        v = beta2*v + (1-beta2)*(grad**2)
 *        w -= eta * (learning_rate * m / (sqrt(v) + epsilon) + w * wd)
 *
 *        Note that gradient is rescaled to grad = rescale_grad * grad. If rescale_grad
 *        the update is skipped.
 *
 *
 *        Defined in src/operator/contrib/adamw.cc:L120
 * \param weight Weight
 * \param grad Gradient
 * \param mean Moving mean
 * \param var Moving variance
 * \param rescale_grad Rescale gradient to rescale_grad * grad. If NaN, Inf, or 0, the
 * \param lr Learning rate
 * \param eta Learning rate schedule multiplier
 * \param beta1 The decay rate for the 1st moment estimates.
 * \param beta2 The decay rate for the 2nd moment estimates.
 * \param epsilon A small constant for numerical stability.
 * \param wd Weight decay augments the objective function with a regularization term that
 *        penalizes large weights. The penalty scales with the square of the magnitude of
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \return new symbol
 */
inline Symbol _adamw_update(Symbol weight,
                            Symbol grad,
                            Symbol mean,
                            Symbol var,
                            Symbol rescale_grad,
                            mx_float lr,
                            mx_float eta,
                            mx_float beta1 = 0.899999976,
                            mx_float beta2 = 0.999000013,
                            mx_float epsilon = 9.99999994e-09,
                            mx_float wd = 0,
                            mx_float clip_gradient = -1) {
  return Operator("_adamw_update")
           .SetParam("lr", lr)
           .SetParam("eta", eta)
           .SetParam("beta1", beta1)
           .SetParam("beta2", beta2)
           .SetParam("epsilon", epsilon)
           .SetParam("wd", wd)
           .SetParam("clip_gradient", clip_gradient)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("mean", mean)
           .SetInput("var", var)
           .SetInput("rescale_grad", rescale_grad)
           .CreateSymbol();
}

/*!
 * \brief
 *        Perform 2D resizing (upsampling or downsampling) for 4D input using bilinear
 *
 *        Expected input is a 4 dimensional NDArray (NCHW) and the output
 *        with the shape of (N x C x height x width).
 *        The key idea of bilinear interpolation is to perform linear interpolation
 *        first in one direction, and then again in the other direction. See the
 *        `Bilinear interpolation
 *        for more details.
 *
 *
 *        Defined in src/operator/contrib/bilinear_resize.cc:L193
 * \param data Input data
 * \param like Resize data to it's shape
 * \param height output height (required, but ignored if scale_height is defined or mode
 * \param width output width (required, but ignored if scale_width is defined or mode is
 * \param scale_height sampling scale of the height (optional, used in modes "scale" and
 * \param scale_width sampling scale of the width (optional, used in modes "scale" and
 * \param mode resizing mode. "simple" - output height equals parameter "height" if
 *        "scale_height" parameter is not defined or input height multiplied by
 *        "scale_height" otherwise. Same for width;"odd_scale" - if original height or
 *        width is odd, then result height is calculated like result_h = (original_h - 1)
 *        * scale + 1; for scale > 1 the result shape would be like if we did
 *        deconvolution with kernel = (1, 1) and stride = (height_scale, width_scale);
 *        and for scale < 1 shape would be like we did convolution with kernel = (1, 1)
 *        and stride = (int(1 / height_scale), int( 1/ width_scale);"like" - resize first
 *        input to the height and width of second input; "to_even_down" - resize input to
 *        nearest lower even height and width (if original height is odd then result
 *        height = original height - 1);"to_even_up" - resize input to nearest bigger
 *        even height and width (if original height is odd then result height = original
 *        height + 1);"to_odd_down" - resize input to nearest odd height and width (if
 *        original height is odd then result height = original height - 1);"to_odd_up" -
 *        resize input to nearest odd height and width (if original height is odd then
 * \return new symbol
 */
inline Symbol _contrib_BilinearResize2D(Symbol data,
                                        Symbol like,
                                        int height = 1,
                                        int width = 1,
                                        mx_float scale_height = mx_float(),
                                        mx_float scale_width = mx_float(),
                                        _contrib_BilinearResize2DMode mode = _contrib_BilinearResize2DMode::kSize) {
  static const char *_contrib_BilinearResize2DModeValues[] = {
    "like",
    "odd_scale",
    "size",
    "to_even_down",
    "to_even_up",
    "to_odd_down",
    "to_odd_up"
  };
  return Operator("_contrib_BilinearResize2D")
           .SetParam("height", height)
           .SetParam("width", width)
           .SetParam("scale_height", scale_height)
           .SetParam("scale_width", scale_width)
           .SetParam("mode", _contrib_BilinearResize2DModeValues[int(mode)])
           .SetInput("data", data)
           .SetInput("like", like)
           .CreateSymbol();
}

/*!
 * \brief This operators implements the quadratic function.
 *
 *        .. math::
 *        f(x) = ax^2+bx+c
 *
 *        where :math:`x` is an input tensor and all operations
 *        in the function are element-wise.
 *
 *        Example::
 *
 *        x = [[1, 2], [3, 4]]
 *        y = quadratic(data=x, a=1, b=2, c=3)
 *        y = [[6, 11], [18, 27]]
 *
 *        The storage type of ``quadratic`` output depends on storage types of inputs
 *        - quadratic(csr, a, b, 0) = csr
 *        - quadratic(default, a, b, c) = default
 *
 *
 *
 *        Defined in src/operator/contrib/quadratic_op.cc:L50
 * \param data Input ndarray
 * \param a Coefficient of the quadratic term in the quadratic function.
 * \param b Coefficient of the linear term in the quadratic function.
 * \param c Constant term in the quadratic function.
 * \return new symbol
 */
inline Symbol _contrib_quadratic(Symbol data,
                                 mx_float a = 0,
                                 mx_float b = 0,
                                 mx_float c = 0) {
  return Operator("_contrib_quadratic")
           .SetParam("a", a)
           .SetParam("b", b)
           .SetParam("c", c)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 * \return new symbol
 */
inline Symbol _contrib_backward_quadratic() {
  return Operator("_contrib_backward_quadratic")
           .CreateSymbol();
}

/*!
 * \brief Rescale the input by the square root of the channel dimension.
 *
 *        out = data / sqrt(data.shape[-1])
 *
 *
 *
 *        Defined in src/operator/contrib/transformer.cc:L38
 * \param data The input array.
 * \return new symbol
 */
inline Symbol _contrib_div_sqrt_dim(Symbol data) {
  return Operator("_contrib_div_sqrt_dim")
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Returns an array of indexes of the input array.
 *
 *        For an input array with shape  :math:`(d_1, d_2, ..., d_n)`, `index_array`
 *        :math:`(d_1, d_2, ..., d_n, n)` array `idx`, where
 *        :math:`idx[i_1, i_2, ..., i_n, :] = [i_1, i_2, ..., i_n]`.
 *
 *        Additionally, when the parameter `axes` is specified, `idx` will be a
 *        :math:`(d_1, d_2, ..., d_n, m)` array where `m` is the length of `axes`, and
 *        equality will hold: :math:`idx[i_1, i_2, ..., i_n, j] = i_{axes[j]}`.
 *
 *        Examples::
 *
 *        x = mx.nd.ones((3, 2))
 *
 *        mx.nd.contrib.index_array(x) = [[[0 0]
 *        [0 1]]
 *
 *        [[1 0]
 *        [1 1]]
 *
 *        [[2 0]
 *        [2 1]]]
 *
 *        x = mx.nd.ones((3, 2, 2))
 *
 *        mx.nd.contrib.index_array(x, axes=(1, 0)) = [[[[0 0]
 *        [0 0]]
 *
 *        [[1 0]
 *        [1 0]]]
 *
 *
 *        [[[0 1]
 *        [0 1]]
 *
 *        [[1 1]
 *        [1 1]]]
 *
 *
 *        [[[0 2]
 *        [0 2]]
 *
 *        [[1 2]
 *        [1 2]]]]
 *
 *
 *
 *        Defined in src/operator/contrib/index_array.cc:L118
 * \param data Input data
 * \param axes The axes to include in the index array. Supports negative values.
 * \return new symbol
 */
inline Symbol _contrib_index_array(Symbol data,
                                   dmlc::optional<Shape> axes = dmlc::optional<Shape>()) {
  return Operator("_contrib_index_array")
           .SetParam("axes", axes)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Copies the elements of a `new_tensor` into the `old_tensor`.
 *
 *        This operator copies the elements by selecting the indices in the order given
 *        The output will be a new tensor containing the rest elements of old tensor and
 *        the copied elements of new tensor.
 *        For example, if `index[i] == j`, then the `i` th row of `new_tensor` is copied
 *        `j` th row of output.
 *
 *        The `index` must be a vector and it must have the same size with the `0` th
 *        `new_tensor`. Also, the `0` th dimension of old_tensor must `>=` the `0` th
 *        `new_tensor`, or an error will be raised.
 *
 *        Examples::
 *
 *        x = mx.nd.zeros((5,3))
 *        t = mx.nd.array([[1,2,3],[4,5,6],[7,8,9]])
 *        index = mx.nd.array([0,4,2])
 *
 *        mx.nd.contrib.index_copy(x, index, t)
 *
 *        [[1. 2. 3.]
 *        [0. 0. 0.]
 *        [7. 8. 9.]
 *        [0. 0. 0.]
 *        [4. 5. 6.]]
 *        <NDArray 5x3 @cpu(0)>
 *
 *
 *
 *        Defined in src/operator/contrib/index_copy.cc:L183
 * \param old_tensor Old tensor
 * \param index_vector Index vector
 * \param new_tensor New tensor to be copied
 * \return new symbol
 */
inline Symbol _contrib_index_copy(Symbol old_tensor,
                                  Symbol index_vector,
                                  Symbol new_tensor) {
  return Operator("_contrib_index_copy")
           .SetInput("old_tensor", old_tensor)
           .SetInput("index_vector", index_vector)
           .SetInput("new_tensor", new_tensor)
           .CreateSymbol();
}

/*!
 * \brief
 * \return new symbol
 */
inline Symbol _contrib_backward_index_copy() {
  return Operator("_contrib_backward_index_copy")
           .CreateSymbol();
}

/*!
 * \brief
 *        This operator takes a 4D feature map as an input array and region proposals as
 *        then align the feature map over sub-regions of input and produces a fixed-sized
 *        This operator is typically used in Faster R-CNN & Mask R-CNN networks.
 *
 *        Different from ROI pooling, ROI Align removes the harsh quantization, properly
 *        the extracted features with the input. RoIAlign computes the value of each
 *        by bilinear interpolation from the nearby grid points on the feature map. No
 *        performed on any coordinates involved in the RoI, its bins, or the sampling
 *        Bilinear interpolation is used to compute the exact values of the
 *        input features at four regularly sampled locations in each RoI bin.
 *        Then the feature map can be aggregated by avgpooling.
 *
 *
 *        References
 *        ----------
 *
 *        He, Kaiming, et al. "Mask R-CNN." ICCV, 2017
 *
 *
 *        Defined in src/operator/contrib/roi_align.cc:L538
 * \param data Input data to the pooling operator, a 4D Feature maps
 * \param rois Bounding box coordinates, a 2D array
 * \param pooled_size ROI Align output roi feature map height and width: (h, w)
 * \param spatial_scale Ratio of input feature map height (or w) to raw image height (or
 * \param sample_ratio Optional sampling ratio of ROI align, using adaptive size by
 * \param position_sensitive Whether to perform position-sensitive RoI pooling.
 *        PSRoIPooling is first proposaled by R-FCN and it can reduce the input channels
 * \return new symbol
 */
inline Symbol _contrib_ROIAlign(Symbol data,
                                Symbol rois,
                                Shape pooled_size,
                                mx_float spatial_scale,
                                int sample_ratio = -1,
                                bool position_sensitive = false) {
  return Operator("_contrib_ROIAlign")
           .SetParam("pooled_size", pooled_size)
           .SetParam("spatial_scale", spatial_scale)
           .SetParam("sample_ratio", sample_ratio)
           .SetParam("position_sensitive", position_sensitive)
           .SetInput("data", data)
           .SetInput("rois", rois)
           .CreateSymbol();
}

/*!
 * \brief Check if all the float numbers in the array are finite (used for AMP)
 *
 *
 *        Defined in src/operator/contrib/all_finite.cc:L101
 * \param data Array
 * \param init_output Initialize output to 1.
 * \return new symbol
 */
inline Symbol all_finite(Symbol data,
                         bool init_output = true) {
  return Operator("all_finite")
           .SetParam("init_output", init_output)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Check if all the float numbers in all the arrays are finite (used for AMP)
 *
 *
 *        Defined in src/operator/contrib/all_finite.cc:L133
 * \param data Arrays
 * \param num_arrays Number of arrays.
 * \param init_output Initialize output to 1.
 * \return new symbol
 */
inline Symbol multi_all_finite(const std::vector<Symbol>& data,
                               int num_arrays = 1,
                               bool init_output = true) {
  return Operator("multi_all_finite")
           .SetParam("num_arrays", num_arrays)
           .SetParam("init_output", init_output)
(data)
           .CreateSymbol();
}

/*!
 * \brief Update function for Group AdaGrad optimizer.
 *
 *        Referenced from *Adaptive Subgradient Methods for Online Learning and
 *        and available at http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf but
 *        uses only a single learning rate for every row of the parameter array.
 *
 *        Updates are applied by::
 *
 *        grad = clip(grad * rescale_grad, clip_gradient)
 *        history += mean(square(grad), axis=1, keepdims=True)
 *        div = grad / sqrt(history + float_stable_eps)
 *        weight -= div * lr
 *
 *        Weights are updated lazily if the gradient is sparse.
 *
 *        Note that non-zero values for the weight decay option are not supported.
 *
 *
 *
 *        Defined in src/operator/contrib/optimizer_op.cc:L71
 * \param weight Weight
 * \param grad Gradient
 * \param history History
 * \param lr Learning rate
 * \param rescale_grad Rescale gradient to grad = rescale_grad*grad.
 * \param clip_gradient Clip gradient to the range of [-clip_gradient, clip_gradient] If
 *        clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
 * \param epsilon Epsilon for numerical stability
 * \return new symbol
 */
inline Symbol _contrib_group_adagrad_update(Symbol weight,
                                            Symbol grad,
                                            Symbol history,
                                            mx_float lr,
                                            mx_float rescale_grad = 1,
                                            mx_float clip_gradient = -1,
                                            mx_float epsilon = 9.99999975e-06) {
  return Operator("_contrib_group_adagrad_update")
           .SetParam("lr", lr)
           .SetParam("rescale_grad", rescale_grad)
           .SetParam("clip_gradient", clip_gradient)
           .SetParam("epsilon", epsilon)
           .SetInput("weight", weight)
           .SetInput("grad", grad)
           .SetInput("history", history)
           .CreateSymbol();
}

/*!
 * \brief
 *        Given an n-d NDArray data, and a 1-d NDArray index,
 *        the operator produces an un-predeterminable shaped n-d NDArray out,
 *        which stands for the rows in x where the corresonding element in index is
 *
 *        >>> data = mx.nd.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
 *        >>> index = mx.nd.array([0, 1, 0])
 *        >>> out = mx.nd.contrib.boolean_mask(data, index)
 *        >>> out
 *
 *        [[4. 5. 6.]]
 *        <NDArray 1x3 @cpu(0)>
 *
 *
 *
 *        Defined in src/operator/contrib/boolean_mask.cc:L211
 * \param data Data
 * \param index Mask
 * \param axis An integer that represents the axis in NDArray to mask from.
 * \return new symbol
 */
inline Symbol _contrib_boolean_mask(Symbol data,
                                    Symbol index,
                                    int axis = 0) {
  return Operator("_contrib_boolean_mask")
           .SetParam("axis", axis)
           .SetInput("data", data)
           .SetInput("index", index)
           .CreateSymbol();
}

/*!
 * \brief Apply non-maximum suppression to input.
 *
 *        The output will be sorted in descending order according to `score`. Boxes with
 *        overlaps larger than `overlap_thresh`, smaller scores and background boxes
 *        will be removed and filled with -1, the corresponding position will be recorded
 *        for backward propogation.
 *
 *        During back-propagation, the gradient will be copied to the original
 *        position according to the input index. For positions that have been suppressed,
 *        the in_grad will be assigned 0.
 *        In summary, gradients are sticked to its boxes, will either be moved or
 *        according to its original index in input.
 *
 *        Input requirements::
 *
 *        1. Input tensor have at least 2 dimensions, (n, k), any higher dims will be
 *        as batch, e.g. (a, b, c, d, n, k) == (a*b*c*d, n, k)
 *        2. n is the number of boxes in each batch
 *        3. k is the width of each box item.
 *
 *        By default, a box is [id, score, xmin, ymin, xmax, ymax, ...],
 *        additional elements are allowed.
 *
 *        - `id_index`: optional, use -1 to ignore, useful if `force_suppress=False`,
 *        we will skip highly overlapped boxes if one is `apple` while the other is `car`.
 *
 *        - `background_id`: optional, default=-1, class id for background boxes, useful
 *        when `id_index >= 0` which means boxes with background id will be filtered
 *
 *        - `coord_start`: required, default=2, the starting index of the 4 coordinates.
 *        Two formats are supported:
 *
 *        - `corner`: [xmin, ymin, xmax, ymax]
 *
 *        - `center`: [x, y, width, height]
 *
 *        - `score_index`: required, default=1, box score/confidence.
 *        When two boxes overlap IOU > `overlap_thresh`, the one with smaller score will
 *
 *        - `in_format` and `out_format`: default='corner', specify in/out box formats.
 *
 *        Examples::
 *
 *        x = [[0, 0.5, 0.1, 0.1, 0.2, 0.2], [1, 0.4, 0.1, 0.1, 0.2, 0.2],
 *        [0, 0.3, 0.1, 0.1, 0.14, 0.14], [2, 0.6, 0.5, 0.5, 0.7, 0.8]]
 *        box_nms(x, overlap_thresh=0.1, coord_start=2, score_index=1, id_index=0,
 *        force_suppress=True, in_format='corner', out_typ='corner') =
 *        [[2, 0.6, 0.5, 0.5, 0.7, 0.8], [0, 0.5, 0.1, 0.1, 0.2, 0.2],
 *        [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]
 *        out_grad = [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
 *        [0.3, 0.3, 0.3, 0.3, 0.3, 0.3], [0.4, 0.4, 0.4, 0.4, 0.4, 0.4]]
 *        # exe.backward
 *        in_grad = [[0.2, 0.2, 0.2, 0.2, 0.2, 0.2], [0, 0, 0, 0, 0, 0],
 *        [0, 0, 0, 0, 0, 0], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1]]
 *
 *
 *
 *        Defined in src/operator/contrib/bounding_box.cc:L93
 * \param data The input
 * \param overlap_thresh Overlapping(IoU) threshold to suppress object with smaller score.
 * \param valid_thresh Filter input boxes to those whose scores greater than valid_thresh.
 * \param topk Apply nms to topk boxes with descending scores, -1 to no restriction.
 * \param coord_start Start index of the consecutive 4 coordinates.
 * \param score_index Index of the scores/confidence of boxes.
 * \param id_index Optional, index of the class categories, -1 to disable.
 * \param background_id Optional, id of the background class which will be ignored in nms.
 * \param force_suppress Optional, if set false and id_index is provided, nms will only
 * \param in_format The input box encoding type.
 *        "corner" means boxes are encoded as [xmin, ymin, xmax, ymax], "center" means
 * \param out_format The output box encoding type.
 *        "corner" means boxes are encoded as [xmin, ymin, xmax, ymax], "center" means
 * \return new symbol
 */
inline Symbol _contrib_box_nms(Symbol data,
                               mx_float overlap_thresh = 0.5,
                               mx_float valid_thresh = 0,
                               int topk = -1,
                               int coord_start = 2,
                               int score_index = 1,
                               int id_index = -1,
                               int background_id = -1,
                               bool force_suppress = false,
                               _contrib_box_nmsInFormat in_format = _contrib_box_nmsInFormat::kCorner,
                               _contrib_box_nmsOutFormat out_format = _contrib_box_nmsOutFormat::kCorner) {
  static const char *_contrib_box_nmsInFormatValues[] = {
    "center",
    "corner"
  };
  static const char *_contrib_box_nmsOutFormatValues[] = {
    "center",
    "corner"
  };
  return Operator("_contrib_box_nms")
           .SetParam("overlap_thresh", overlap_thresh)
           .SetParam("valid_thresh", valid_thresh)
           .SetParam("topk", topk)
           .SetParam("coord_start", coord_start)
           .SetParam("score_index", score_index)
           .SetParam("id_index", id_index)
           .SetParam("background_id", background_id)
           .SetParam("force_suppress", force_suppress)
           .SetParam("in_format", _contrib_box_nmsInFormatValues[int(in_format)])
           .SetParam("out_format", _contrib_box_nmsOutFormatValues[int(out_format)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Bounding box overlap of two arrays.
 *        The overlap is defined as Intersection-over-Union, aka, IOU.
 *        - lhs: (a_1, a_2, ..., a_n, 4) array
 *        - rhs: (b_1, b_2, ..., b_n, 4) array
 *        - output: (a_1, a_2, ..., a_n, b_1, b_2, ..., b_n) array
 *
 *        Note::
 *
 *        Zero gradients are back-propagated in this op for now.
 *
 *        Example::
 *
 *        x = [[0.5, 0.5, 1.0, 1.0], [0.0, 0.0, 0.5, 0.5]]
 *        y = [[0.25, 0.25, 0.75, 0.75]]
 *        box_iou(x, y, format='corner') = [[0.1428], [0.1428]]
 *
 *
 *
 *        Defined in src/operator/contrib/bounding_box.cc:L134
 * \param lhs The first input
 * \param rhs The second input
 * \param format The box encoding type.
 *        "corner" means boxes are encoded as [xmin, ymin, xmax, ymax], "center" means
 * \return new symbol
 */
inline Symbol _contrib_box_iou(Symbol lhs,
                               Symbol rhs,
                               _contrib_box_iouFormat format = _contrib_box_iouFormat::kCorner) {
  static const char *_contrib_box_iouFormatValues[] = {
    "center",
    "corner"
  };
  return Operator("_contrib_box_iou")
           .SetParam("format", _contrib_box_iouFormatValues[int(format)])
           .SetInput("lhs", lhs)
           .SetInput("rhs", rhs)
           .CreateSymbol();
}

/*!
 * \brief Compute bipartite matching.
 *        The matching is performed on score matrix with shape [B, N, M]
 *        - B: batch_size
 *        - N: number of rows to match
 *        - M: number of columns as reference to be matched against.
 *
 *        Returns:
 *        x : matched column indices. -1 indicating non-matched elements in rows.
 *        y : matched row indices.
 *
 *        Note::
 *
 *        Zero gradients are back-propagated in this op for now.
 *
 *        Example::
 *
 *        s = [[0.5, 0.6], [0.1, 0.2], [0.3, 0.4]]
 *        x, y = bipartite_matching(x, threshold=1e-12, is_ascend=False)
 *        x = [1, -1, 0]
 *        y = [2, 0]
 *
 *
 *
 *        Defined in src/operator/contrib/bounding_box.cc:L180
 * \param data The input
 * \param threshold Ignore matching when score < thresh, if is_ascend=false, or ignore
 * \param is_ascend Use ascend order for scores instead of descending. Please set
 * \param topk Limit the number of matches to topk, set -1 for no limit
 * \return new symbol
 */
inline Symbol _contrib_bipartite_matching(Symbol data,
                                          mx_float threshold,
                                          bool is_ascend = false,
                                          int topk = -1) {
  return Operator("_contrib_bipartite_matching")
           .SetParam("threshold", threshold)
           .SetParam("is_ascend", is_ascend)
           .SetParam("topk", topk)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 *        Applies a 2D adaptive average pooling over a 4D input with the shape of (NCHW).
 *        The pooling kernel and stride sizes are automatically chosen for desired output
 *
 *        - If a single integer is provided for output_size, the output size is \
 *        (N x C x output_size x output_size) for any input (NCHW).
 *
 *        - If a tuple of integers (height, width) are provided for output_size, the
 *        (N x C x height x width) for any input (NCHW).
 *
 *
 *
 *        Defined in src/operator/contrib/adaptive_avg_pooling.cc:L214
 * \param data Input data
 * \param output_size int (output size) or a tuple of int for output (height, width).
 * \return new symbol
 */
inline Symbol _contrib_AdaptiveAvgPooling2D(Symbol data,
                                            Shape output_size = {}) {
  return Operator("_contrib_AdaptiveAvgPooling2D")
           .SetParam("output_size", output_size)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief
 *        Calculate the mean and variance of `data`.
 *
 *        The mean and variance are calculated by aggregating the contents of data across
 *        If x is 1-D and axes = [0] this is just the mean and variance of a vector.
 *
 *        Example:
 *
 *        x = [[1, 2, 3], [4, 5, 6]]
 *        mean, var = moments(data=x, axes=[0])
 *        mean = [2.5, 3.5, 4.5]
 *        var = [2.25, 2.25, 2.25]
 *        mean, var = moments(data=x, axes=[1])
 *        mean = [2.0, 5.0]
 *        var = [0.66666667, 0.66666667]
 *        mean, var = moments(data=x, axis=[0, 1])
 *        mean = [3.5]
 *        var = [2.9166667]
 *
 *
 *
 *        Defined in src/operator/nn/moments.cc:L54
 * \param data Input ndarray
 * \param axes Array of ints. Axes along which to compute mean and variance.
 * \param keepdims produce moments with the same dimensionality as the input.
 * \return new symbol
 */
inline Symbol moments(Symbol data,
                      dmlc::optional<Shape> axes = dmlc::optional<Shape>(),
                      bool keepdims = false) {
  return Operator("moments")
           .SetParam("axes", axes)
           .SetParam("keepdims", keepdims)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Applies the softmax function.
 *
 *        The resulting array contains elements in the range (0,1) and the elements along
 *
 *        .. math::
 *        softmax(\mathbf{z/t})_j = \frac{e^{z_j/t}}{\sum_{k=1}^K e^{z_k/t}}
 *
 *        for :math:`j = 1, ..., K`
 *
 *        t is the temperature parameter in softmax function. By default, t equals 1.0
 *
 *        Example::
 *
 *        x = [[ 1.  1.  1.]
 *        [ 1.  1.  1.]]
 *
 *        softmax(x,axis=0) = [[ 0.5  0.5  0.5]
 *        [ 0.5  0.5  0.5]]
 *
 *        softmax(x,axis=1) = [[ 0.33333334,  0.33333334,  0.33333334],
 *        [ 0.33333334,  0.33333334,  0.33333334]]
 *
 *
 *
 *        Defined in src/operator/nn/softmax.cc:L93
 * \param data The input array.
 * \param axis The axis along which to compute softmax.
 * \param temperature Temperature parameter in softmax
 * \param dtype DType of the output in case this can't be inferred. Defaults to the same
 * \return new symbol
 */
inline Symbol softmax(Symbol data,
                      int axis = -1,
                      dmlc::optional<double> temperature = dmlc::optional<double>(),
                      SoftmaxDtype dtype = SoftmaxDtype::kNone) {
  static const char *SoftmaxDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("softmax")
           .SetParam("axis", axis)
           .SetParam("temperature", temperature)
           .SetParam("dtype", SoftmaxDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Applies the softmin function.
 *
 *        The resulting array contains elements in the range (0,1) and the elements along
 *        up to 1.
 *
 *        .. math::
 *        softmin(\mathbf{z/t})_j = \frac{e^{-z_j/t}}{\sum_{k=1}^K e^{-z_k/t}}
 *
 *        for :math:`j = 1, ..., K`
 *
 *        t is the temperature parameter in softmax function. By default, t equals 1.0
 *
 *        Example::
 *
 *        x = [[ 1.  2.  3.]
 *        [ 3.  2.  1.]]
 *
 *        softmin(x,axis=0) = [[ 0.88079703,  0.5,  0.11920292],
 *        [ 0.11920292,  0.5,  0.88079703]]
 *
 *        softmin(x,axis=1) = [[ 0.66524094,  0.24472848,  0.09003057],
 *        [ 0.09003057,  0.24472848,  0.66524094]]
 *
 *
 *
 *        Defined in src/operator/nn/softmax.cc:L153
 * \param data The input array.
 * \param axis The axis along which to compute softmax.
 * \param temperature Temperature parameter in softmax
 * \param dtype DType of the output in case this can't be inferred. Defaults to the same
 * \return new symbol
 */
inline Symbol softmin(Symbol data,
                      int axis = -1,
                      dmlc::optional<double> temperature = dmlc::optional<double>(),
                      SoftminDtype dtype = SoftminDtype::kNone) {
  static const char *SoftminDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("softmin")
           .SetParam("axis", axis)
           .SetParam("temperature", temperature)
           .SetParam("dtype", SoftminDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes the log softmax of the input.
 *        This is equivalent to computing softmax followed by log.
 *
 *        Examples::
 *
 *        >>> x = mx.nd.array([1, 2, .1])
 *        >>> mx.nd.log_softmax(x).asnumpy()
 *        array([-1.41702998, -0.41702995, -2.31702995], dtype=float32)
 *
 *        >>> x = mx.nd.array( [[1, 2, .1],[.1, 2, 1]] )
 *        >>> mx.nd.log_softmax(x, axis=0).asnumpy()
 *        array([[-0.34115392, -0.69314718, -1.24115396],
 *        [-1.24115396, -0.69314718, -0.34115392]], dtype=float32)
 *
 *
 *
 * \param data The input array.
 * \param axis The axis along which to compute softmax.
 * \param temperature Temperature parameter in softmax
 * \param dtype DType of the output in case this can't be inferred. Defaults to the same
 * \return new symbol
 */
inline Symbol log_softmax(Symbol data,
                          int axis = -1,
                          dmlc::optional<double> temperature = dmlc::optional<double>(),
                          Log_softmaxDtype dtype = Log_softmaxDtype::kNone) {
  static const char *Log_softmaxDtypeValues[] = {
    "None",
    "float16",
    "float32",
    "float64"
  };
  return Operator("log_softmax")
           .SetParam("axis", axis)
           .SetParam("temperature", temperature)
           .SetParam("dtype", Log_softmaxDtypeValues[int(dtype)])
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Computes 1D or 2D transposed convolution (aka fractionally strided convolution)
 *        of the input tensor. This operation can be seen as the gradient of Convolution
 *        operation with respect to its input. Convolution usually reduces the size of
 *        the input. Transposed convolution works the other way, going from a smaller
 * \param data Input tensor to the deconvolution operation.
 * \param weight Weights representing the kernel.
 * \param bias Bias added to the result after the deconvolution operation.
 * \param kernel Deconvolution kernel size: (w,), (h, w) or (d, h, w). This is same as
 * \param num_filter Number of output filters.
 * \param stride The stride used for the corresponding convolution: (w,), (h, w) or (d,
 * \param dilate Dilation factor for each dimension of the input: (w,), (h, w) or (d, h,
 * \param pad The amount of implicit zero padding added during convolution for each
 *        dimension of the input: (w,), (h, w) or (d, h, w). ``(kernel-1)/2`` is usually
 *        a good choice. If `target_shape` is set, `pad` will be ignored and a padding
 * \param adj Adjustment for output shape: (w,), (h, w) or (d, h, w). If `target_shape`
 * \param target_shape Shape of the output tensor: (w,), (h, w) or (d, h, w).
 * \param num_group Number of groups partition.
 * \param workspace Maximum temporary workspace allowed (MB) in deconvolution.This
 *        parameter has two usages. When CUDNN is not used, it determines the effective
 *        batch size of the deconvolution kernel. When CUDNN is used, it controls the
 *        maximum temporary storage used for tuning the best CUDNN kernel when
 * \param no_bias Whether to disable bias parameter.
 * \param cudnn_tune Whether to pick convolution algorithm by running performance test.
 * \param cudnn_off Turn off cudnn for this layer.
 * \param layout Set layout for input, output and weight. Empty for default layout, NCW
 * \return new symbol
 */
inline Symbol Deconvolution(Symbol data,
                            Symbol weight,
                            Symbol bias,
                            Shape kernel,
                            uint32_t num_filter,
                            Shape stride = {},
                            Shape dilate = {},
                            Shape pad = {},
                            Shape adj = {},
                            Shape target_shape = {},
                            uint32_t num_group = 1,
                            uint64_t workspace = 512,
                            bool no_bias = true,
                            DeconvolutionCudnnTune cudnn_tune = DeconvolutionCudnnTune::kNone,
                            bool cudnn_off = false,
                            DeconvolutionLayout layout = DeconvolutionLayout::kNone) {
  static const char *DeconvolutionCudnnTuneValues[] = {
    "None",
    "fastest",
    "limited_workspace",
    "off"
  };
  static const char *DeconvolutionLayoutValues[] = {
    "None",
    "NCDHW",
    "NCHW",
    "NCW",
    "NDHWC",
    "NHWC"
  };
  return Operator("Deconvolution")
           .SetParam("kernel", kernel)
           .SetParam("num_filter", num_filter)
           .SetParam("stride", stride)
           .SetParam("dilate", dilate)
           .SetParam("pad", pad)
           .SetParam("adj", adj)
           .SetParam("target_shape", target_shape)
           .SetParam("num_group", num_group)
           .SetParam("workspace", workspace)
           .SetParam("no_bias", no_bias)
           .SetParam("cudnn_tune", DeconvolutionCudnnTuneValues[int(cudnn_tune)])
           .SetParam("cudnn_off", cudnn_off)
           .SetParam("layout", DeconvolutionLayoutValues[int(layout)])
           .SetInput("data", data)
           .SetInput("weight", weight)
           .SetInput("bias", bias)
           .CreateSymbol();
}

/*!
 * \brief Upsamples the given input data.
 *
 *        Two algorithms (``sample_type``) are available for upsampling:
 *
 *        - Nearest Neighbor
 *        - Bilinear
 *
 *        **Nearest Neighbor Upsampling**
 *
 *        Input data is expected to be NCHW.
 *
 *        Example::
 *
 *        x = [[[[1. 1. 1.]
 *        [1. 1. 1.]
 *        [1. 1. 1.]]]]
 *
 *        UpSampling(x, scale=2, sample_type='nearest') = [[[[1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]
 *        [1. 1. 1. 1. 1. 1.]]]]
 *
 *        **Bilinear Upsampling**
 *
 *        Uses `deconvolution` algorithm under the hood. You need provide both input data
 *
 *        Input data is expected to be NCHW.
 *
 *        `num_filter` is expected to be same as the number of channels.
 *
 *        Example::
 *
 *        x = [[[[1. 1. 1.]
 *        [1. 1. 1.]
 *        [1. 1. 1.]]]]
 *
 *        w = [[[[1. 1. 1. 1.]
 *        [1. 1. 1. 1.]
 *        [1. 1. 1. 1.]
 *        [1. 1. 1. 1.]]]]
 *
 *        UpSampling(x, w, scale=2, sample_type='bilinear', num_filter=1) = [[[[1. 2. 2.
 *        [2. 4. 4. 4. 4. 2.]
 *        [2. 4. 4. 4. 4. 2.]
 *        [2. 4. 4. 4. 4. 2.]
 *        [2. 4. 4. 4. 4. 2.]
 *        [1. 2. 2. 2. 2. 1.]]]]
 *
 *
 *        Defined in src/operator/nn/upsampling.cc:L173
 * \param data Array of tensors to upsample. For bilinear upsampling, there should be 2
 * \param scale Up sampling scale
 * \param sample_type upsampling method
 * \param num_args Number of inputs to be upsampled. For nearest neighbor upsampling,
 *        this can be 1-N; the size of output will be(scale*h_0,scale*w_0) and all other
 *        inputs will be upsampled to thesame size. For bilinear upsampling this must be
 * \param num_filter Input filter. Only used by bilinear sample_type.Since bilinear
 * \param multi_input_mode How to handle multiple input. concat means concatenate
 *        upsampled images along the channel dimension. sum means add all images
 * \param workspace Tmp workspace for deconvolution (MB)
 * \return new symbol
 */
inline Symbol UpSampling(const std::vector<Symbol>& data,
                         int scale,
                         UpSamplingSampleType sample_type,
                         int num_args,
                         int num_filter = 0,
                         UpSamplingMultiInputMode multi_input_mode = UpSamplingMultiInputMode::kConcat,
                         uint64_t workspace = 512) {
  static const char *UpSamplingSampleTypeValues[] = {
    "bilinear",
    "nearest"
  };
  static const char *UpSamplingMultiInputModeValues[] = {
    "concat",
    "sum"
  };
  return Operator("UpSampling")
           .SetParam("scale", scale)
           .SetParam("sample_type", UpSamplingSampleTypeValues[int(sample_type)])
           .SetParam("num_args", num_args)
           .SetParam("num_filter", num_filter)
           .SetParam("multi_input_mode", UpSamplingMultiInputModeValues[int(multi_input_mode)])
           .SetParam("workspace", workspace)
(data)
           .CreateSymbol();
}

/*!
 * \brief Batch normalization.
 *
 *        Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
 *        well as offset ``beta``.
 *
 *        Assume the input has more than one dimension and we normalize along axis 1.
 *        We first compute the mean and variance along this axis:
 *
 *        .. math::
 *
 *        data\_mean[i] = mean(data[:,i,:,...]) \\
 *        data\_var[i] = var(data[:,i,:,...])
 *
 *        Then compute the normalized output, which has the same shape as input, as
 *
 *        .. math::
 *
 *        out[:,i,:,...] = \frac{data[:,i,:,...] -
 *
 *        Both *mean* and *var* returns a scalar by treating the input as a vector.
 *
 *        Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 *        have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
 *        the inverse of ``data_var``, which are needed for the backward pass. Note that
 *        two outputs are blocked.
 *
 *        Besides the inputs and the outputs, this operator accepts two auxiliary
 *        states, ``moving_mean`` and ``moving_var``, which are *k*-length
 *        vectors. They are global statistics for the whole dataset, which are updated
 *        by::
 *
 *        moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
 *        moving_var = moving_var * momentum + data_var * (1 - momentum)
 *
 *        If ``use_global_stats`` is set to be true, then ``moving_mean`` and
 *        ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
 *        the output. It is often used during inference.
 *
 *        The parameter ``axis`` specifies which axis of the input shape denotes
 *        the 'channel' (separately normalized groups).  The default is 1.  Specifying -1
 *        axis to be the last item in the input shape.
 *
 *        Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
 *        then set ``gamma`` to 1 and its gradient to 0.
 *
 *        .. Note::
 *        When ``fix_gamma`` is set to True, no sparse support is provided. If
 *        the sparse tensors will fallback.
 *
 *
 *
 *        Defined in src/operator/nn/batch_norm.cc:L572
 * \param data Input data to batch normalization
 * \param gamma gamma array
 * \param beta beta array
 * \param moving_mean running mean of input
 * \param moving_var running variance of input
 * \param eps Epsilon to prevent div 0. Must be no less than CUDNN_BN_MIN_EPSILON defined
 * \param momentum Momentum for moving average
 * \param fix_gamma Fix gamma while training
 * \param use_global_stats Whether use global moving statistics instead of local
 * \param output_mean_var Output the mean and inverse std
 * \param axis Specify which shape axis the channel is specified
 * \param cudnn_off Do not select CUDNN operator, if available
 * \return new symbol
 */
inline Symbol BatchNorm(Symbol data,
                        Symbol gamma,
                        Symbol beta,
                        Symbol moving_mean,
                        Symbol moving_var,
                        double eps = 0.0010000000474974513,
                        mx_float momentum = 0.899999976,
                        bool fix_gamma = true,
                        bool use_global_stats = false,
                        bool output_mean_var = false,
                        int axis = 1,
                        bool cudnn_off = false) {
  return Operator("BatchNorm")
           .SetParam("eps", eps)
           .SetParam("momentum", momentum)
           .SetParam("fix_gamma", fix_gamma)
           .SetParam("use_global_stats", use_global_stats)
           .SetParam("output_mean_var", output_mean_var)
           .SetParam("axis", axis)
           .SetParam("cudnn_off", cudnn_off)
           .SetInput("data", data)
           .SetInput("gamma", gamma)
           .SetInput("beta", beta)
           .SetInput("moving_mean", moving_mean)
           .SetInput("moving_var", moving_var)
           .CreateSymbol();
}

/*!
 * \brief Connectionist Temporal Classification Loss.
 *
 *        .. note:: The existing alias ``contrib_CTCLoss`` is deprecated.
 *
 *        The shapes of the inputs and outputs:
 *
 *        - **data**: `(sequence_length, batch_size, alphabet_size)`
 *        - **label**: `(batch_size, label_sequence_length)`
 *        - **out**: `(batch_size)`
 *
 *        The `data` tensor consists of sequences of activation vectors (without applying
 *        with i-th channel in the last dimension corresponding to i-th label
 *        for i between 0 and alphabet_size-1 (i.e always 0-indexed).
 *        Alphabet size should include one additional value reserved for blank label.
 *        When `blank_label` is ``"first"``, the ``0``-th channel is be reserved for
 *        activation of blank label, or otherwise if it is "last",
 *        reserved for blank label.
 *
 *        ``label`` is an index matrix of integers. When `blank_label` is ``"first"``,
 *        the value 0 is then reserved for blank label, and should not be passed in this
 *        when `blank_label` is ``"last"``, the value `(alphabet_size-1)` is reserved for
 *
 *        If a sequence of labels is shorter than *label_sequence_length*, use the special
 *        padding value at the end of the sequence to conform it to the correct
 *        length. The padding value is `0` when `blank_label` is ``"first"``, and `-1`
 *
 *        For example, suppose the vocabulary is `[a, b, c]`, and in one batch we have
 *        'ba', 'cbb', and 'abac'. When `blank_label` is ``"first"``, we can index the
 *        `{'a': 1, 'b': 2, 'c': 3}`, and we reserve the 0-th channel for blank label in
 *        The resulting `label` tensor should be padded to be::
 *
 *        [[2, 1, 0, 0], [3, 2, 2, 0], [1, 2, 1, 3]]
 *
 *        When `blank_label` is ``"last"``, we can index the labels as
 *        `{'a': 0, 'b': 1, 'c': 2}`, and we reserve the channel index 3 for blank label
 *        The resulting `label` tensor should be padded to be::
 *
 *        [[1, 0, -1, -1], [2, 1, 1, -1], [0, 1, 0, 2]]
 *
 *        ``out`` is a list of CTC loss values, one per example in the batch.
 *
 *        See *Connectionist Temporal Classification: Labelling Unsegmented
 *        Sequence Data with Recurrent Neural Networks*, A. Graves *et al*. for more
 *        information on the definition and the algorithm.
 *
 *
 *
 *        Defined in src/operator/nn/ctc_loss.cc:L100
 * \param data Input ndarray
 * \param label Ground-truth labels for the loss.
 * \param data_lengths Lengths of data for each of the samples. Only required when
 * \param label_lengths Lengths of labels for each of the samples. Only required when
 * \param use_data_lengths Whether the data lenghts are decided by `data_lengths`. If
 * \param use_label_lengths Whether the label lenghts are decided by `label_lengths`, or
 *        derived from `padding_mask`. If false, the lengths are derived from the first
 *        occurrence of the value of `padding_mask`. The value of `padding_mask` is ``0``
 *        when first CTC label is reserved for blank, and ``-1`` when last label is
 * \param blank_label Set the label that is reserved for blank label.If "first", 0-th
 *        label is reserved, and label values for tokens in the vocabulary are between
 *        ``1`` and ``alphabet_size-1``, and the padding mask is ``-1``. If "last", last
 *        label value ``alphabet_size-1`` is reserved for blank label instead, and label
 *        values for tokens in the vocabulary are between ``0`` and ``alphabet_size-2``,
 * \return new symbol
 */
inline Symbol CTCLoss(Symbol data,
                      Symbol label,
                      Symbol data_lengths,
                      Symbol label_lengths,
                      bool use_data_lengths = false,
                      bool use_label_lengths = false,
                      CTCLossBlankLabel blank_label = CTCLossBlankLabel::kFirst) {
  static const char *CTCLossBlankLabelValues[] = {
    "first",
    "last"
  };
  return Operator("CTCLoss")
           .SetParam("use_data_lengths", use_data_lengths)
           .SetParam("use_label_lengths", use_label_lengths)
           .SetParam("blank_label", CTCLossBlankLabelValues[int(blank_label)])
           .SetInput("data", data)
           .SetInput("label", label)
           .SetInput("data_lengths", data_lengths)
           .SetInput("label_lengths", label_lengths)
           .CreateSymbol();
}

/*!
 * \brief Applies local response normalization to the input.
 *
 *        The local response normalization layer performs "lateral inhibition" by
 *        over local input regions.
 *
 *        If :math:`a_{x,y}^{i}` is the activity of a neuron computed by applying kernel
 *        :math:`(x, y)` and then applying the ReLU nonlinearity, the response-normalized
 *        activity :math:`b_{x,y}^{i}` is given by the expression:
 *
 *        .. math::
 *        b_{x,y}^{i} = \frac{a_{x,y}^{i}}{\Bigg({k + \frac{\alpha}{n} \sum_{j=max(0,
 *
 *        where the sum runs over :math:`n` "adjacent" kernel maps at the same spatial
 *        number of kernels in the layer.
 *
 *
 *
 *        Defined in src/operator/nn/lrn.cc:L164
 * \param data Input data to LRN
 * \param nsize normalization window width in elements.
 * \param alpha The variance scaling parameter :math:`lpha` in the LRN expression.
 * \param beta The power parameter :math:`eta` in the LRN expression.
 * \param knorm The parameter :math:`k` in the LRN expression.
 * \return new symbol
 */
inline Symbol LRN(Symbol data,
                  uint32_t nsize,
                  mx_float alpha = 9.99999975e-05,
                  mx_float beta = 0.75,
                  mx_float knorm = 2) {
  return Operator("LRN")
           .SetParam("nsize", nsize)
           .SetParam("alpha", alpha)
           .SetParam("beta", beta)
           .SetParam("knorm", knorm)
           .SetInput("data", data)
           .CreateSymbol();
}

/*!
 * \brief Layer normalization.
 *
 *        Normalizes the channels of the input tensor by mean and variance, and applies a
 *        well as offset ``beta``.
 *
 *        Assume the input has more than one dimension and we normalize along axis 1.
 *        We first compute the mean and variance along this axis and then
 *        compute the normalized output, which has the same shape as input, as following:
 *
 *        .. math::
 *
 *        out = \frac{data - mean(data, axis)}{\sqrt{var(data, axis) + \epsilon}} * gamma
 *
 *        Both ``gamma`` and ``beta`` are learnable parameters.
 *
 *        Unlike BatchNorm and InstanceNorm,  the *mean* and *var* are computed along the
 *
 *        Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 *        have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
 *        ``data_std``. Note that no gradient will be passed through these two outputs.
 *
 *        The parameter ``axis`` specifies which axis of the input shape denotes
 *        the 'channel' (separately normalized groups).  The default is -1, which sets
 *        axis to be the last item in the input shape.
 *
 *
 *
 *        Defined in src/operator/nn/layer_norm.cc:L155
 * \param data Input data to layer normalization
 * \param gamma gamma array
 * \param beta beta array
 * \param axis The axis to perform layer normalization. Usually, this should be be axis
 * \param eps An `epsilon` parameter to prevent division by 0.
 * \param output_mean_var Output the mean and std calculated along the given axis.
 * \return new symbol
 */
inline Symbol LayerNorm(Symbol data,
                        Symbol gamma,
                        Symbol beta,
                        int axis = -1,
                        mx_float eps = 9.99999975e-06,
                        bool output_mean_var = false) {
  return Operator("LayerNorm")
           .SetParam("axis", axis)
           .SetParam("eps", eps)