diff --git a/op_plugin/config/op_plugin_functions.yaml b/op_plugin/config/op_plugin_functions.yaml index 8603580f844d2d466ad0c5701af1c95d8a2f8e9f..06054104580d5540d35e6ede011fe4992da1e2e3 100644 --- a/op_plugin/config/op_plugin_functions.yaml +++ b/op_plugin/config/op_plugin_functions.yaml @@ -4996,6 +4996,17 @@ official: - func: triangular_solve.X(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False, *, Tensor(a!) X, Tensor(b!) M) -> (Tensor(a!) solution, Tensor(b!) cloned_coefficient) acl_op: all_version + op_api: all_version + gen_opapi: + X: + size: 'std::get<0>(output_sizes)' + dtype: X + M: + size: 'std::get<1>(output_sizes)' + dtype: M + new_params: + output_sizes: 'op_infer::triangular_solve_output_size(self, A)' + exec: aclnnTriangularSolve - func: tril(Tensor self, int diagonal=0) -> Tensor acl_op: all_version diff --git a/op_plugin/utils/KernelNpuOutputSize.cpp b/op_plugin/utils/KernelNpuOutputSize.cpp index ef7c772130e83a608eb23d0aa8cda77f8eb05900..58d83c5e148aea9eb340f90475d8ecbe4fdb26d7 100644 --- a/op_plugin/utils/KernelNpuOutputSize.cpp +++ b/op_plugin/utils/KernelNpuOutputSize.cpp @@ -2174,4 +2174,14 @@ c10::SmallVector, SIZE> split_with_sizes_copy_ou return output_shapes; } +std::tuple, c10::SmallVector> +triangular_solve_output_size(const at::Tensor& self, const at::Tensor& A) +{ + auto result = at::native::_linalg_broadcast_batch_dims(self, A, "triangular_solve"); + return std::make_tuple( + array_to_small_vector(std::get<0>(result).sizes()), + array_to_small_vector(std::get<1>(result).sizes()) + ); +} + } // namespace op_infer diff --git a/op_plugin/utils/KernelNpuOutputSize.h b/op_plugin/utils/KernelNpuOutputSize.h index 163b7e80e12e4c0d9e842c609d84e883ed4498ca..a2906e717d52da029a3bea829fe0188681d6f26c 100644 --- a/op_plugin/utils/KernelNpuOutputSize.h +++ b/op_plugin/utils/KernelNpuOutputSize.h @@ -375,5 +375,9 @@ OP_PLUGIN_HIDDEN c10::SmallVector npu_moe_token_permute_grad_out_ OP_PLUGIN_HIDDEN c10::SmallVector npu_moe_token_unpermute_grad_permuted_tokens_out_size(const at::Tensor &permuted_tokens, const at::Tensor &grad_unpermuted_tokens, const at::Tensor &sorted_indices, const c10::optional &probs); OP_PLUGIN_HIDDEN c10::SmallVector npu_moe_token_unpermute_grad_probs_out_size(const at::Tensor &permuted_tokens, const at::Tensor &grad_unpermuted_tokens, const at::Tensor &sorted_indices, const c10::optional &probs); OP_PLUGIN_HIDDEN c10::SmallVector, SIZE> split_with_sizes_copy_output_size(const c10::SmallVector& input_shape, const c10::IntArrayRef split_sizes, int64_t dim); + +OP_PLUGIN_HIDDEN std::tuple, c10::SmallVector> +triangular_solve_output_size(const at::Tensor& self, const at::Tensor& A); + } // namespace op_infer #endif // OP_PLUGIN_UTILS_KERNEL_NPU_INFER_SHAPE