llvm/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml

### AUTOGENERATED from core_named_ops.py
### To regenerate, run: bin/update_core_linalg_named_ops.sh
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: copy
  cpp_class_name: CopyOp
  doc: |-
    Copies the tensor elementwise.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  defines:
  - hasCanonicalizer
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: type
        attr_name: cast
        type_var: U
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: elemwise_unary
  cpp_class_name: ElemwiseUnaryOp
  doc: |-
    Applies the unary function fun elementwise.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: fun
    kind: unary_fn_attr
    default_fn: exp
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        attr_name: fun
        operands:
        - !ScalarExpression
          scalar_fn:
            kind: type
            attr_name: cast
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: exp
  cpp_class_name: ExpOp
  doc: |-
    Applies exp(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: exp
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: log
  cpp_class_name: LogOp
  doc: |-
    Applies log(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: log
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: abs
  cpp_class_name: AbsOp
  doc: |-
    Applies abs(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: abs
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: ceil
  cpp_class_name: CeilOp
  doc: |-
    Applies ceil(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: ceil
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: floor
  cpp_class_name: FloorOp
  doc: |-
    Applies floor(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: floor
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: negf
  cpp_class_name: NegFOp
  doc: |-
    Applies negf(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: negf
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: reciprocal
  cpp_class_name: ReciprocalOp
  doc: |-
    Applies reciprocal(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: reciprocal
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: round
  cpp_class_name: RoundOp
  doc: |-
    Applies round(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: round
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: sqrt
  cpp_class_name: SqrtOp
  doc: |-
    Applies sqrt(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: sqrt
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: rsqrt
  cpp_class_name: RsqrtOp
  doc: |-
    Applies rsqrt(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: rsqrt
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: square
  cpp_class_name: SquareOp
  doc: |-
    Applies square(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: square
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: tanh
  cpp_class_name: TanhOp
  doc: |-
    Applies tanh(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: tanh
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: erf
  cpp_class_name: ErfOp
  doc: |-
    Applies erf(x) elementwise.

    No numeric casting is performed on the input operand.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: unary
        fn_name: erf
        operands:
        - !ScalarExpression
          scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: elemwise_binary
  cpp_class_name: ElemwiseBinaryOp
  doc: |-
    Applies the binary function fun elementwise.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: fun
    kind: binary_fn_attr
    default_fn: add
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        attr_name: fun
        operands:
        - !ScalarExpression
          scalar_fn:
            kind: type
            attr_name: cast
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: lhs
        - !ScalarExpression
          scalar_fn:
            kind: type
            attr_name: cast
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: add
  cpp_class_name: AddOp
  doc: |-
    Adds two tensors elementwise.

    The shapes and element types must be identical. The appropriate casts,
    broadcasts and reductions should be done previously to calling this op.

    This means reduction/broadcast/element cast semantics is explicit. Further
    passes can take that into account when lowering this code. For example,
    a `linalg.broadcast` + `linalg.add` sequence can be lowered to a
    `linalg.generic` with different affine maps for the two operands.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: lhs
        - !ScalarExpression
          scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: sub
  cpp_class_name: SubOp
  doc: |-
    Subtracts two tensors elementwise.

    The shapes and element types must be identical. The appropriate casts,
    broadcasts and reductions should be done previously to calling this op.

    This means reduction/broadcast/element cast semantics is explicit. Further
    passes can take that into account when lowering this code. For example,
    a `linalg.broadcast` + `linalg.sub` sequence can be lowered to a
    `linalg.generic` with different affine maps for the two operands.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: sub
        operands:
        - !ScalarExpression
          scalar_arg: lhs
        - !ScalarExpression
          scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: mul
  cpp_class_name: MulOp
  doc: |-
    Multiplies two tensors elementwise.

    The shapes and element types must be identical. The appropriate casts,
    broadcasts and reductions should be done previously to calling this op.

    This means reduction/broadcast/element cast semantics is explicit. Further
    passes can take that into account when lowering this code. For example,
    a `linalg.broadcast` + `linalg.mul` sequence can be lowered to a
    `linalg.generic` with different affine maps for the two operands.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: mul
        operands:
        - !ScalarExpression
          scalar_arg: lhs
        - !ScalarExpression
          scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: div
  cpp_class_name: DivOp
  doc: |-
    Divides the first tensor by the second tensor, elementwise.

    The shapes and element types must be identical. The appropriate casts,
    broadcasts and reductions should be done previously to calling this op.

    This means reduction/broadcast/element cast semantics is explicit. Further
    passes can take that into account when lowering this code. For example,
    a `linalg.broadcast` + `linalg.div` sequence can be lowered to a
    `linalg.generic` with different affine maps for the two operands.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: div
        operands:
        - !ScalarExpression
          scalar_arg: lhs
        - !ScalarExpression
          scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: div_unsigned
  cpp_class_name: DivUnsignedOp
  doc: |-
    Divides the first tensor by the second tensor, elementwise. For integer
    types, performs an unsigned division.

    The shapes and element types must be identical. The appropriate casts,
    broadcasts and reductions should be done previously to calling this op.

    This means reduction/broadcast/element cast semantics is explicit. Further
    passes can take that into account when lowering this code. For example,
    a `linalg.broadcast` + `linalg.div` sequence can be lowered to a
    `linalg.generic` with different affine maps for the two operands.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: div_unsigned
        operands:
        - !ScalarExpression
          scalar_arg: lhs
        - !ScalarExpression
          scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: max
  cpp_class_name: MaxOp
  doc: |-
    Takes the max (signed) between two inputs, elementwise.

    The shapes and element types must be identical. The appropriate casts,
    broadcasts and reductions should be done previously to calling this op.

    This means reduction/broadcast/element cast semantics is explicit. Further
    passes can take that into account when lowering this code. For example,
    a `linalg.broadcast` + `linalg.max` sequence can be lowered to a
    `linalg.generic` with different affine maps for the two operands.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: lhs
        - !ScalarExpression
          scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: min
  cpp_class_name: MinOp
  doc: |-
    Takes the min (signed) between two inputs, elementwise.

    The shapes and element types must be identical. The appropriate casts,
    broadcasts and reductions should be done previously to calling this op.

    This means reduction/broadcast/element cast semantics is explicit. Further
    passes can take that into account when lowering this code. For example,
    a `linalg.broadcast` + `linalg.min` sequence can be lowered to a
    `linalg.generic` with different affine maps for the two operands.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_signed
        operands:
        - !ScalarExpression
          scalar_arg: lhs
        - !ScalarExpression
          scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: powf
  cpp_class_name: PowFOp
  doc: |-
    Takes the powf(lhs, rhs) between two inputs, elementwise. For powf(arg, 2) use `linalg.square`.

    Only applies to floating point values.

    The shapes and element types must be identical. The appropriate casts,
    broadcasts and reductions should be done previously to calling this op.

    This means reduction/broadcast/element cast semantics is explicit. Further
    passes can take that into account when lowering this code. For example,
    a `linalg.broadcast` + `linalg.powf` sequence can be lowered to a
    `linalg.generic` with different affine maps for the two operands.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: powf
        operands:
        - !ScalarExpression
          scalar_arg: lhs
        - !ScalarExpression
          scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: select
  cpp_class_name: SelectOp
  doc: |-
    Chooses one value based on a binary condition supplied as its first operand.

    The shapes and element types must be identical. The appropriate casts,
    broadcasts and reductions should be done previously to calling this op.

    This means reduction/broadcast/element cast semantics is explicit. Further
    passes can take that into account when lowering this code. For example,
    a `linalg.broadcast` + `linalg.select` sequence can be lowered to a
    `linalg.generic` with different affine maps for the two operands.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: cond
    kind: input_tensor
    type_var: U
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T1
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: ternary
        fn_name: select
        operands:
        - !ScalarExpression
          scalar_arg: cond
        - !ScalarExpression
          scalar_arg: lhs
        - !ScalarExpression
          scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: matmul
  cpp_class_name: MatmulOp
  doc: |-
    Performs a matrix multiplication of two 2D inputs.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: quantized_matmul
  cpp_class_name: QuantizedMatmulOp
  doc: |-
    Performs a matrix multiplication of two 2D inputs.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. The quantized variant
    includes zero-point adjustments for the left and right operands of the
    matmul.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s1, s2)>
  - !LinalgOperandDefConfig
    name: AZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: BZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> ()>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: A
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: AZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: B
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: BZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: matmul_transpose_a
  cpp_class_name: MatmulTransposeAOp
  doc: |-
    Performs a matrix multiplication of two 2D inputs with lhs operand
    transposed.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d0)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: matmul_transpose_b
  cpp_class_name: MatmulTransposeBOp
  doc: |-
    Performs a matrix multiplication of two 2D inputs with rhs operand
    transposed.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  - !LinalgOperandDefConfig
    name: cast
    kind: type_fn_attr
    default_fn: cast_signed
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d1, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                attr_name: cast
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: mmt4d
  cpp_class_name: Mmt4DOp
  doc: |-
    Performs a matrix-matrix-transpose multiplication of two 4D inputs.

    Differences from linalg.matmul:
    * The right hand side is transposed, whence the 't' in 'mmt'.
    * The input and output tensors have a 4D shape instead of a 2D shape. They
      are interpreted as 2D matrices with one level of 2D tile subdivision,
      whence the 2+2=4 dimensions. The inner tile dimensions are identified with
      '0' suffixes below, for instance the LHS matrix shape (M, K, M0, K0) reads
      as: MxK tiles, each of shape M0xK0.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: LhsType
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2, s3)>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: RhsType
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s1, s5, s3)>
  - !LinalgOperandDefConfig
    name: accum
    kind: output_tensor
    type_var: AccumType
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s4, s2, s5)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d3,
      d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d1, d2, d4,
      d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d3,
      d4)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: accum
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: accum
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: AccumType
                operands:
                - !ScalarExpression
                  scalar_arg: lhs
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: AccumType
                operands:
                - !ScalarExpression
                  scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_mmt4d
  cpp_class_name: BatchMmt4DOp
  doc: |-
    Performs a batched matrix-matrix-transpose multiplication of two
    batched-4D (5D) inputs.

    Besides the outermost batch dimension has the same semantic as
    linalg.batch_matmul, the differences from linalg.batch_matmul in the
    non-batch dimensions are the same as linalg.mmt4d vs. linalg.matmul. See the
    description of lingalg.mmt4d.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: lhs
    kind: input_tensor
    type_var: LhsType
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s2, s3, s4)>
  - !LinalgOperandDefConfig
    name: rhs
    kind: input_tensor
    type_var: RhsType
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s5, s2, s6, s4)>
  - !LinalgOperandDefConfig
    name: accum
    kind: output_tensor
    type_var: AccumType
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s5, s3, s6)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6] -> (d0,
      d1, d3, d4, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6] -> (d0,
      d2, d3, d5, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6] -> (d0,
      d1, d2, d4, d5)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: accum
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: accum
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: AccumType
                operands:
                - !ScalarExpression
                  scalar_arg: lhs
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: AccumType
                operands:
                - !ScalarExpression
                  scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_matmul
  cpp_class_name: BatchMatmulOp
  doc: |-
    Performs a batched matrix multiplication of two 3D inputs.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_matmul_transpose_a
  cpp_class_name: BatchMatmulTransposeAOp
  doc: |-
    Performs a batched matrix multiplication of two 3D inputs where lhs operand
    has its non-batch dimensions transposed.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d1)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_matmul_transpose_b
  cpp_class_name: BatchMatmulTransposeBOp
  doc: |-
    Performs a batched matrix multiplication of two 3D inputs where rhs operand
    has its non-batch dimensions transposed.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s3, s2)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d2, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: quantized_batch_matmul
  cpp_class_name: QuantizedBatchMatmulOp
  doc: |-
    Performs a batched matrix multiplication of two 3D inputs.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. The quantized variant
    includes zero-point adjustments for the left and right operands of the
    matmul.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
  - !LinalgOperandDefConfig
    name: AZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: BZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> ()>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: A
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: AZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: B
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: BZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_reduce_matmul
  cpp_class_name: BatchReduceMatmulOp
  doc: |-
    Performs a batch-reduce matrix multiplication of two 3D inputs.
    The partial multiplication results are reduced into a 2D output.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s1, s3)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d1, d2)>
  iterator_types:
  - reduction
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: matvec
  cpp_class_name: MatvecOp
  doc: |-
    Performs a matrix-vector multiplication.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: y
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1] -> (s1)>
  - !LinalgOperandDefConfig
    name: x
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1] -> (s0)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d0)>
  iterator_types:
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: x
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: x
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: y
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: vecmat
  cpp_class_name: VecmatOp
  doc: |-
    Performs a vector-matrix multiplication.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: y
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1] -> (s0)>
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: x
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1] -> (s1)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1)[s0, s1] -> (d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d1, d0)>
    - affine_map<(d0, d1)[s0, s1] -> (d0)>
  iterator_types:
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: x
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: x
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: y
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_matvec
  cpp_class_name: BatchMatvecOp
  doc: |-
    Performs a batched matrix-vector multiplication.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: batch_vecmat
  cpp_class_name: BatchVecmatOp
  doc: |-
    Performs a batched matrix-vector multiplication.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2, d1)>
    - affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: dot
  cpp_class_name: DotOp
  doc: |-
    Performs a dot product of two vectors to a scalar result.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: A
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0] -> (s0)>
  - !LinalgOperandDefConfig
    name: B
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0] -> (s0)>
  - !LinalgOperandDefConfig
    name: C
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0] -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0)[s0] -> (d0)>
    - affine_map<(d0)[s0] -> (d0)>
    - affine_map<(d0)[s0] -> ()>
  iterator_types:
  - reduction
  assignments:
  - !ScalarAssign
    arg: C
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: C
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: A
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_1d
  cpp_class_name: Conv1DOp
  doc: |-
    Performs 1-D convolution with no channels.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1] -> (s0 + s1)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1] -> (s1)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1] -> (s0)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1)[s0, s1] -> (d0 + d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d1)>
    - affine_map<(d0, d1)[s0, s1] -> (d0)>
  iterator_types:
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d
  cpp_class_name: Conv2DOp
  doc: |-
    Performs 2-D convolution with no channels.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0 + s1, s2 + s3)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s1, s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 + d2, d1 + d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d2, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_3d
  cpp_class_name: Conv3DOp
  doc: |-
    Performs 3-D convolution with no channels.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0 + s1, s2 + s3, s4 + s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s1, s3, s5)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s2, s4)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0 + d3, d1
      + d4, d2 + d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d3, d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_1d_nwc_wcf
  cpp_class_name: Conv1DNwcWcfOp
  doc: |-
    Performs 1-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1 * s2 + s3 * s4,
      s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3, s5, s6)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1 * s2
      + d3 * s4, d4)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d3, d4, d2)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_1d_ncw_fcw
  cpp_class_name: Conv1DNcwFcwOp
  doc: |-
    Performs 1-D convolution.

    Layout:
      * Input: NCW.
      * Kernel: FCW.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s2 * s3 + s4
      * s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s6, s1, s4)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s6, s2)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s5)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d3, d2 *
      s3 + d4 * s5)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d1, d3, d4)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_nhwc_hwcf
  cpp_class_name: Conv2DNhwcHwcfOp
  doc: |-
    Performs 2-D convolution.

    Layout:
      * Input: NHWC.
      * Kernel: HWCF.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
      s7, s9, s10)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d4, d5, d6, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_nhwc_fhwc
  cpp_class_name: Conv2DNhwcFhwcOp
  doc: |-
    Performs 2-D convolution.

    Layout:
      * Input: NHWC.
      * Kernel: FHWC.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
      s3, s7, s9)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d3, d4, d5, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_nhwc_hwcf_q
  cpp_class_name: Conv2DNhwcHwcfQOp
  doc: |-
    Performs 2-D convolution with zero point offsets.

    Layout:
      * Input: NHWC.
      * Kernel: HWCF.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. This includes the zero
    point offsets common to quantized operations.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
      s7, s9, s10)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d4, d5, d6, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_nhwc_fhwc_q
  cpp_class_name: Conv2DNhwcFhwcQOp
  doc: |-
    Performs 2-D convolution with zero point offsets.

    Layout:
      * Input: NHWC.
      * Kernel: FHWC.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. This includes the zero
    point offsets common to quantized operations.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
      s3, s7, s9)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d3, d4, d5, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_nchw_fchw
  cpp_class_name: Conv2DNchwFchwOp
  doc: |-
    Performs 2-D convolution.

    Layout:
      * Input: NCHW.
      * Kernel: FCHW.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
      s1, s4, s8)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s10, s2, s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s3, s7)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s5, s9)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d4, d2 * s3 + d5 * s5, d3 * s7 + d6 * s9)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d1, d4, d5, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_ngchw_fgchw
  cpp_class_name: Conv2DNgchwFgchwOp
  doc: |-
    Performs 2-D grouped convolution.

    Layout:
      * Input: NGCHW.
      * Kernel: FGCHW.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s0, s1, s2, s3 * s4 + s5 * s6, s7 * s8 + s9 * s10)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s11, s1, s2, s5, s9)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s0, s1, s11, s3, s7)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
      -> (s4, s8)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
      -> (s6, s10)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d0, d1, d5, d3 * s4 + d6 * s6, d4 * s8 + d7 * s10)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d2, d1, d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_ngchw_gfchw
  cpp_class_name: Conv2DNgchwGfchwOp
  doc: |-
    Performs 2-D grouped convolution.

    Layout:
      * Input: NGCHW.
      * Kernel: GFCHW.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s0, s1, s2, s3 * s4 + s5 * s6, s7 * s8 + s9 * s10)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s1, s11, s2, s5, s9)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s0, s1, s11, s3, s7)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
      -> (s4, s8)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
      -> (s6, s10)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d0, d1, d5, d3 * s4 + d6 * s6, d4 * s8 + d7 * s10)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d1, d2, d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_2d_ngchw_gfchw_q
  cpp_class_name: Conv2DNgchwGfchwQOp
  doc: |-
    Performs 2-D grouped convolution with zero-point offsets.

    Layout:
      * Input: NGCHW.
      * Kernel: GFCHW.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. This includes the zero
    point offsets common to quantized operations.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s0, s1, s2, s3 * s4 + s5 * s6, s7 * s8 + s9 * s10)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s1, s11, s2, s5, s9)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] ->
      (s0, s1, s11, s3, s7)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
      -> (s4, s8)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
      -> (s6, s10)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d0, d1, d5, d3 * s4 + d6 * s6, d4 * s8 + d7 * s10)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d1, d2, d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_3d_ndhwc_dhwcf
  cpp_class_name: Conv3DNdhwcDhwcfOp
  doc: |-
    Performs 3-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
      s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s3, s7, s11, s13, s14)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1, s5, s9, s14)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
      * s8, d3 * s10 + d7 * s12, d8)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_3d_ndhwc_dhwcf_q
  cpp_class_name: Conv3DNdhwcDhwcfQOp
  doc: |-
    Performs 3-D convolution with zero point offsets.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. This includes the zero
    point offsets common to quantized operations.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
      s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s3, s7, s11, s13, s14)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1, s5, s9, s14)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
      * s8, d3 * s10 + d7 * s12, d8)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: conv_3d_ncdhw_fcdhw
  cpp_class_name: Conv3DNcdhwFcdhwOp
  doc: |-
    Performs 3-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9, s10 * s11 + s12
      * s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s14, s1, s4, s8, s12)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s14, s2, s6, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s3, s7, s11)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s5, s9, s13)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d8, d1 * s3 + d5 * s5, d2 * s7
      + d6 * s9, d3 * s11 + d7 * s13)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d4, d8, d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d4, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_1d_nwc_wc
  cpp_class_name: DepthwiseConv1DNwcWcOp
  doc: |-
    Performs depth-wise 1-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3, s5)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3, d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_1d_ncw_cw
  cpp_class_name: DepthwiseConv1DNcwCwOp
  doc: |-
    Performs depth-wise 1-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s1, s4)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d1 * s3 + d3
      * s5)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d2, d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d1)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_1d_nwc_wcm
  cpp_class_name: DepthwiseConv1DNwcWcmOp
  doc: |-
    Performs depth-wise 1-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1 * s2 + s3 * s4,
      s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s3, s5, s6)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s0, s1, s5, s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1 * s2
      + d4 * s4, d2)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d4, d2, d3)>
    - affine_map<(d0, d1, d2, d3, d4)[s0, s1, s2, s3, s4, s5, s6] -> (d0, d1, d2,
      d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nhwc_hwc
  cpp_class_name: DepthwiseConv2DNhwcHwcOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7, s9)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nchw_chw
  cpp_class_name: DepthwiseConv2DNchwChwOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
      * s3 + s4 * s5, s6 * s7 + s8 * s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s1, s4, s8)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
      s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3,
      s7)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5,
      s9)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d3, d1 * s3 + d4 * s5, d2 * s7 + d5 * s9)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d3, d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d3, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nhwc_hwc_q
  cpp_class_name: DepthwiseConv2DNhwcHwcQOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7, s9)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nhwc_hwcm
  cpp_class_name: DepthwiseConv2DNhwcHwcmOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
      s7, s9, s10)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s9, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d5, d6, d3, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_2d_nhwc_hwcm_q
  cpp_class_name: DepthwiseConv2DNhwcHwcmQOp
  doc: |-
    Performs depth-wise 2-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s3,
      s7, s9, s10)>
  - !LinalgOperandDefConfig
    name: IZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: KZp
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
      s1, s5, s9, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s2, s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
      (s4, s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d5, d6, d3, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> ()>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
      s9, s10] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: I
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: IZp
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: sub
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: K
                - !ScalarExpression
                  scalar_fn:
                    kind: type
                    fn_name: cast_signed
                    type_var: U
                    operands:
                    - !ScalarExpression
                      scalar_arg: KZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_3d_ndhwc_dhwc
  cpp_class_name: DepthwiseConv3DNdhwcDhwcOp
  doc: |-
    Performs depth-wise 3-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s3, s7, s11, s13)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s5, s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3
      * s10 + d6 * s12, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d4, d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d7)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - parallel
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_3d_ncdhw_cdhw
  cpp_class_name: DepthwiseConv3DNcdhwCdhwOp
  doc: |-
    Performs depth-wise 3-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output. Multiplier is set to 1
    which is a special case for most depthwise convolutions.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9, s10 * s11 + s12 * s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s1, s4, s8, s12)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s2, s6, s10)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s3, s7, s11)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s5, s9, s13)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d7, d1 * s3 + d4 * s5, d2 * s7 + d5 * s9,
      d3 * s11 + d6 * s13)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d7, d4, d5, d6)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d7, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - parallel
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: depthwise_conv_3d_ndhwc_dhwcm
  cpp_class_name: DepthwiseConv3DNdhwcDhwcmOp
  doc: |-
    Performs depth-wise 3-D convolution.

    Numeric casting is performed on the operands to the inner multiply, promoting
    them to the same data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12,
      s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s3, s7, s11, s13, s14)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13, s14] -> (s0, s1, s5, s9, s14)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13, s14] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6
      * s8, d3 * s10 + d7 * s12, d8)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d5, d6, d7, d8, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7, d8)[s0, s1, s2, s3, s4, s5, s6,
      s7, s8, s9, s10, s11, s12, s13, s14] -> (d0, d1, d2, d3, d8, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  - parallel
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: mul
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: I
            - !ScalarExpression
              scalar_fn:
                kind: type
                fn_name: cast_signed
                type_var: U
                operands:
                - !ScalarExpression
                  scalar_arg: K
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_sum
  cpp_class_name: PoolingNhwcSumOp
  doc: |-
    Performs sum pooling.

    Layout:
      * Input: NHWC.
      * Kernel: HW.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nchw_sum
  cpp_class_name: PoolingNchwSumOp
  doc: |-
    Performs sum pooling.

    Layout:
      * Input: NCHW.
      * Kernel: HW.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
      * s3 + s4 * s5, s6 * s7 + s8 * s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
      s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3,
      s7)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5,
      s9)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2 * s3 + d4 * s5, d3 * s7 + d5 * s9)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_max
  cpp_class_name: PoolingNhwcMaxOp
  doc: |-
    Performs max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_max_unsigned
  cpp_class_name: PoolingNhwcMaxUnsignedOp
  doc: |-
    Performs unsigned max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_unsigned
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_unsigned
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nchw_max
  cpp_class_name: PoolingNchwMaxOp
  doc: |-
    Performs max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2
      * s3 + s4 * s5, s6 * s7 + s8 * s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, s8)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2,
      s6)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3,
      s7)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5,
      s9)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2 * s3 + d4 * s5, d3 * s7 + d5 * s9)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_min
  cpp_class_name: PoolingNhwcMinOp
  doc: |-
    Performs min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nhwc_min_unsigned
  cpp_class_name: PoolingNhwcMinUnsignedOp
  doc: |-
    Performs unsigned min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1 *
      s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, s7)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s5,
      s9)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2,
      s6)>
    default_indices:
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4,
      s8)>
    default_indices:
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d3)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d4, d5)>
    - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9]
      -> (d0, d1, d2, d3)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_unsigned
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_unsigned
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_sum
  cpp_class_name: PoolingNwcSumOp
  doc: |-
    Performs sum pooling.

    Layout:
      * Input: NWC.
      * Kernel: W.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ncw_sum
  cpp_class_name: PoolingNcwSumOp
  doc: |-
    Performs sum pooling.

    Layout:
      * Input: NCW.
      * Kernel: W.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2 * s3 + d3
      * s5)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_max
  cpp_class_name: PoolingNwcMaxOp
  doc: |-
    Performs max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_max_unsigned
  cpp_class_name: PoolingNwcMaxUnsignedOp
  doc: |-
    Performs unsigned max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_unsigned
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_unsigned
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ncw_max
  cpp_class_name: PoolingNcwMaxOp
  doc: |-
    Performs max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2 * s3 + d3
      * s5)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_min
  cpp_class_name: PoolingNwcMinOp
  doc: |-
    Performs min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_nwc_min_unsigned
  cpp_class_name: PoolingNwcMinUnsignedOp
  doc: |-
    Performs unsigned min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)>
    default_indices:
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)>
    default_indices:
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4,
      d2)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)>
    - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_unsigned
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_unsigned
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ndhwc_sum
  cpp_class_name: PoolingNdhwcSumOp
  doc: |-
    Performs 3D sum pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s3, s7, s11)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s5, s9, s13)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3
      * s10 + d7 * s12, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: add
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ndhwc_max
  cpp_class_name: PoolingNdhwcMaxOp
  doc: |-
    Performs 3D max pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s3, s7, s11)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s5, s9, s13)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3
      * s10 + d7 * s12, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: max_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: pooling_ndhwc_min
  cpp_class_name: PoolingNdhwcMinOp
  doc: |-
    Performs 3D min pooling.

    Numeric casting is performed on the input operand, promoting it to the same
    data type as the accumulator/output.
  implements:
  - LinalgConvolutionOpInterface
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: I
    kind: input_tensor
    type_var: T1
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9 * s10 + s11 * s12, s13)>
  - !LinalgOperandDefConfig
    name: K
    kind: input_tensor
    type_var: T2
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s3, s7, s11)>
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
      s13] -> (s0, s1, s5, s9, s13)>
  - !LinalgOperandDefConfig
    name: strides
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s2, s6, s10)>
    default_indices:
    - 1
    - 1
    - 1
  - !LinalgOperandDefConfig
    name: dilations
    kind: index_attr
    index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
      s12, s13] -> (s4, s8, s12)>
    default_indices:
    - 1
    - 1
    - 1
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1 * s2 + d5 * s4, d2 * s6 + d6 * s8, d3
      * s10 + d7 * s12, d4)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d5, d6, d7)>
    - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7,
      s8, s9, s10, s11, s12, s13] -> (d0, d1, d2, d3, d4)>
  iterator_types:
  - parallel
  - parallel
  - parallel
  - parallel
  - parallel
  - reduction
  - reduction
  - reduction
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: binary
        fn_name: min_signed
        operands:
        - !ScalarExpression
          scalar_arg: O
        - !ScalarExpression
          scalar_fn:
            kind: type
            fn_name: cast_signed
            type_var: U
            operands:
            - !ScalarExpression
              scalar_arg: I
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: fill
  cpp_class_name: FillOp
  doc: |-
    Fills the output tensor with the given value.

    Works for arbitrary ranked output tensors since the operation performs scalar
    accesses only and is thus rank polymorphic. Numeric casting is performed on
    the value operand, promoting it to the same data type as the output.
  implements:
  - LinalgFillOpInterface
  defines:
  - hasCanonicalizer
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: value
    kind: scalar
    type_var: T1
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: U
    shape_map: affine_map<() -> ()>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<() -> ()>
    - affine_map<() -> ()>
  iterator_types: []
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: type
        fn_name: cast_signed
        type_var: U
        operands:
        - !ScalarExpression
          scalar_arg: value
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
  name: fill_rng_2d
  cpp_class_name: FillRng2DOp
  doc: |-
    Fills the output tensor with pseudo random numbers.

    The operation generations pseudo random numbers using a linear congruential
    generator. It provides no guarantees regarding the distribution of the
    generated random numbers. Instead of generating the random numbers
    sequentially, it instantiates one random number generator per data element
    and runs them in parallel. The seed operand and the indices of the data
    element seed the random number generation. The min and max operands limit
    the range of the generated random numbers.
structured_op: !LinalgStructuredOpConfig
  args:
  - !LinalgOperandDefConfig
    name: min
    kind: scalar
    type_var: F64
  - !LinalgOperandDefConfig
    name: max
    kind: scalar
    type_var: F64
  - !LinalgOperandDefConfig
    name: seed
    kind: scalar
    type_var: I32
  - !LinalgOperandDefConfig
    name: O
    kind: output_tensor
    type_var: T
    shape_map: affine_map<()[s0, s1] -> (s0, s1)>
  indexing_maps: !LinalgIndexingMapsConfig
    static_indexing_maps:
    - affine_map<(d0, d1)[s0, s1] -> ()>
    - affine_map<(d0, d1)[s0, s1] -> ()>
    - affine_map<(d0, d1)[s0, s1] -> ()>
    - affine_map<(d0, d1)[s0, s1] -> (d0, d1)>
  iterator_types:
  - parallel
  - parallel
  assignments:
  - !ScalarAssign
    arg: O
    value: !ScalarExpression
      scalar_fn:
        kind: type
        fn_name: cast_signed
        type_var: T
        operands:
        - !ScalarExpression
          scalar_fn:
            kind: binary
            fn_name: add
            operands:
            - !ScalarExpression
              scalar_fn:
                kind: binary
                fn_name: mul
                operands:
                - !ScalarExpression
                  scalar_fn:
                    kind: binary
                    fn_name: add
                    operands:
                    - !ScalarExpression
                      scalar_fn:
                        kind: type
                        fn_name: cast_signed
                        type_var: F64
                        operands:
                        - !ScalarExpression
                          scalar_const: '2147483647 : i64'
                    - !ScalarExpression
                      scalar_fn:
                        kind: type
                        fn_name: cast_signed
                        type_var: F64
                        operands:
                        - !ScalarExpression
                          scalar_fn:
                            kind: binary
                            fn_name: add
                            operands:
                            - !ScalarExpression
                              scalar_fn:
                                kind: binary
                                fn_name: mul
                                operands:
                                - !ScalarExpression
                                  scalar_fn:
                                    kind: binary
                                    fn_name: add
                                    operands:
                                    - !ScalarExpression
                                      scalar_fn:
                                        kind: type
                                        fn_name: cast_signed
                                        type_var: I32
                                        operands:
                                        - !ScalarExpression
                                          scalar_index: 1
                                    - !ScalarExpression
                                      scalar_fn:
                                        kind: binary
                                        fn_name: add
                                        operands:
                                        - !ScalarExpression
                                          scalar_fn:
                                            kind: binary
                                            fn_name: mul
                                            operands:
                                            - !ScalarExpression
                                              scalar_fn:
                                                kind: binary
                                                fn_name: add
                                                operands:
                                                - !ScalarExpression
                                                  scalar_fn:
                                                    kind: type
                                                    fn_name: cast_signed
                                                    type_var: I32
                                                    operands:
                                                    - !ScalarExpression
                                                      scalar_index: 0
                                                - !ScalarExpression
                                                  scalar_arg: seed
                                            - !ScalarExpression
                                              scalar_fn:
                                                kind: type
                                                fn_name: cast_signed
                                                type_var: I32
                                                operands:
                                                - !ScalarExpression
                                                  scalar_const: '1103515245 : i64'
                                        - !ScalarExpression
                                          scalar_fn:
                                            kind: type
                                            fn_name: cast_signed
                                            type_var: I32
                                            operands:
                                            - !ScalarExpression
                                              scalar_const: '12345 : i64'
                                - !ScalarExpression
                                  scalar_fn:
                                    kind: type
                                    fn_name: cast_signed
                                    type_var: I32
                                    operands:
                                    - !ScalarExpression
                                      scalar_const: '1103515245 : i64'
                            - !ScalarExpression
                              scalar_fn:
                                kind: type
                                fn_name: cast_signed
                                type_var: I32
                                operands:
                                - !ScalarExpression
                                  scalar_const: '12345 : i64'
                - !ScalarExpression
                  scalar_fn:
                    kind: binary
                    fn_name: mul
                    operands:
                    - !ScalarExpression
                      scalar_fn:
                        kind: binary
                        fn_name: sub
                        operands:
                        - !ScalarExpression
                          scalar_arg: max
                        - !ScalarExpression
                          scalar_arg: min
                    - !ScalarExpression
                      scalar_fn:
                        kind: type
                        fn_name: cast_signed
                        type_var: F64
                        operands:
                        - !ScalarExpression
                          scalar_const: '2.3283063999999999E-10 : f64'
            - !ScalarExpression
              scalar_arg: min