Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
58e82b8
conv:tf32:add more instances
yingluAMD Sep 16, 2025
45d0057
add instances of device_grouped_conv_fwd_xdl_f32_comp_instances
yingluAMD Sep 18, 2025
823ee07
add instances of device_grouped_conv_fwd_xdl_f32_tf32_mem_instances
yingluAMD Sep 18, 2025
255a25d
add instances of device_grouped_conv_fwd_xdl_large_tensor_f32_tf32_in…
yingluAMD Sep 18, 2025
58a3fa1
review
yingluAMD Sep 18, 2025
7f6962e
tf32:conv:add instances for base class DeviceConvFwd
yingluAMD Sep 19, 2025
ddfc65d
tf32:conv:add instances for base class DeviceGroupedConvBwdDataMultipleD
yingluAMD Sep 19, 2025
de9a550
tf32:conv:add instances for base class DeviceGroupedConvBwdWeight
yingluAMD Sep 22, 2025
b3db6c1
self review
yingluAMD Sep 23, 2025
b3bb54f
add tf32 in profiler
yingluAMD Sep 23, 2025
623a991
Merge branch 'develop' into tf32_instance_0919
yingluAMD Sep 25, 2025
7a653cd
remove useless instances
yingluAMD Sep 23, 2025
040aee6
remove gnhwc/ngchw/ngcdhw instances
yingluAMD Sep 24, 2025
a8d9fbe
remove useless bwd instances
yingluAMD Sep 25, 2025
94da54b
change check_err for tf32
yingluAMD Sep 25, 2025
f54bab1
fix clang-format fail
yingluAMD Sep 25, 2025
374e6bb
remove non-ndhwgc/nhwgc/nhwc instances
yingluAMD Sep 26, 2025
6f66571
complement ndhwgc instances
yingluAMD Sep 26, 2025
1be39fc
update copyright datetime
yingluAMD Sep 26, 2025
a1b65ec
add check in IsSupportedArgument()
yingluAMD Sep 29, 2025
735034e
Merge branch 'develop' into tf32_instance_0919
yingluAMD Oct 9, 2025
bea113c
add similar instances as new bwd instances
yingluAMD Oct 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions example/17_convnd_bwd_data/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,23 @@ if(result EQUAL 0)
target_link_libraries(example_convnd_bwd_data_xdl_fp16 PRIVATE utility)
endif()

add_example_executable(example_convnd_bwd_data_xdl_fp32 convnd_bwd_data_xdl_fp32.cpp)
if(result EQUAL 0)
target_link_libraries(example_convnd_bwd_data_xdl_fp32 PRIVATE utility)
endif()

list(APPEND gpu_list gfx942)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_example_executable(example_convnd_bwd_data_xdl_fp32_tf32 convnd_bwd_data_xdl_fp32_tf32.cpp)
if(result EQUAL 0)
target_link_libraries(example_convnd_bwd_data_xdl_fp32_tf32 PRIVATE utility)
endif()
set(target 1)
endif()
endforeach()

add_example_executable(example_convnd_bwd_data_dl_fp16 convnd_bwd_data_dl_fp16.cpp)
if(result EQUAL 0)
target_link_libraries(example_convnd_bwd_data_dl_fp16 PRIVATE utility)
Expand Down
9 changes: 7 additions & 2 deletions example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ template <ck::index_t NDimSpatial,
typename InElementOp,
typename WeiElementOp,
typename OutElementOp,
typename DeviceConvNdBwdDataInstance>
typename DeviceConvNdBwdDataInstance,
typename ComputeDataType = OutDataType>
int run_conv_bwd_data(bool do_verification,
int init_method,
bool time_kernel,
Expand Down Expand Up @@ -150,7 +151,11 @@ int run_conv_bwd_data(bool do_verification,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp>();
OutElementOp,
0,
0,
0,
ComputeDataType>();

auto ref_invoker = ref_conv.MakeInvoker();

Expand Down
207 changes: 207 additions & 0 deletions example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp32.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

#include "convnd_bwd_data_common.hpp"

#include "ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp"

using InDataType = float;
using WeiDataType = float;
using OutDataType = float;
using AccDataType = float;

template <ck::index_t... Is>
using S = ck::Sequence<Is...>;

using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;

static constexpr auto ConvBwdDefault =
ck::tensor_operation::device::ConvolutionBackwardDataSpecialization::Default;

template <ck::index_t NDimSpatial>
using DeviceConvNdBwdDataInstance = ck::tensor_operation::device::DeviceConvNdBwdDataNwcKxcNwk_Xdl<
NDimSpatial, // NDimSpatial
InDataType, // InDataType
WeiDataType, // WeiDataType
OutDataType, // OutDataType
AccDataType, // AccDataType
InElementOp, // InElementwiseOperation
WeiElementOp, // WeiElementwiseOperation
OutElementOp, // OutElementwiseOperation
ConvBwdDefault, // ConvolutionBackwardDataSpecialization
256, // BlockSize
128, // MPerBlock
128, // NPerBlock
4, // K0PerBlock
8, // K1
32, // MPerXdl
32, // NPerXdl
2, // MXdlPerWave
2, // NXdlPerWave
S<4, 64, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
2, // ABlockTransferSrcVectorDim
8, // ABlockTransferSrcScalarPerVector
8, // ABlockTransferDstScalarPerVector_K1
true, // ABlockLdsAddExtraM
S<4, 64, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<2, 0, 1>, // BBlockTransferThreadClusterArrangeOrder
S<0, 2, 1>, // BBlockTransferSrcAccessOrder
1, // BBlockTransferSrcVectorDim
2, // BBlockTransferSrcScalarPerVector
8, // BBlockTransferDstScalarPerVector_K1
true, // BBlockLdsAddExtraN
7,
1>; // GemmCThreadTransferDstScalarPerVector

int main(int argc, char* argv[])
{
namespace ctc = ck::tensor_layout::convolution;

print_helper_msg();

bool do_verification = true;
int init_method = 1;
bool time_kernel = false;

ck::utils::conv::ConvParam conv_param{
2, 1, 128, 256, 256, {3, 3}, {71, 71}, {2, 2}, {1, 1}, {1, 1}, {1, 1}};

if(argc == 1)
{
// use default
}
else if(argc == 4)
{
do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
time_kernel = std::stoi(argv[3]);
}
else
{
do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
time_kernel = std::stoi(argv[3]);
const ck::index_t num_dim_spatial = std::stoi(argv[4]);

conv_param = ck::utils::conv::parse_conv_param(num_dim_spatial, 5, argv);
}

const auto in_element_op = InElementOp{};
const auto wei_element_op = WeiElementOp{};
const auto out_element_op = OutElementOp{};

if(conv_param.num_dim_spatial_ == 1)
{
using InLayout = ctc::GNWC;
using WeiLayout = ctc::GKXC;
using OutLayout = ctc::GNWK;

const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(
conv_param);

const auto wei_g_k_c_xs_desc =
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(
conv_param);

const auto out_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(
conv_param);

return run_conv_bwd_data<1,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp,
DeviceConvNdBwdDataInstance<1>>(do_verification,
init_method,
time_kernel,
conv_param,
in_g_n_c_wis_desc,
wei_g_k_c_xs_desc,
out_g_n_k_wos_desc,
in_element_op,
wei_element_op,
out_element_op);
}
else if(conv_param.num_dim_spatial_ == 2)
{
using InLayout = ctc::GNHWC;
using WeiLayout = ctc::GKYXC;
using OutLayout = ctc::GNHWK;

const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(
conv_param);

const auto wei_g_k_c_xs_desc =
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(
conv_param);

const auto out_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(
conv_param);

return run_conv_bwd_data<2,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp,
DeviceConvNdBwdDataInstance<2>>(do_verification,
init_method,
time_kernel,
conv_param,
in_g_n_c_wis_desc,
wei_g_k_c_xs_desc,
out_g_n_k_wos_desc,
in_element_op,
wei_element_op,
out_element_op);
}
else if(conv_param.num_dim_spatial_ == 3)
{
using InLayout = ctc::GNDHWC;
using WeiLayout = ctc::GKZYXC;
using OutLayout = ctc::GNDHWK;

const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(
conv_param);

const auto wei_g_k_c_xs_desc =
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(
conv_param);

const auto out_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(
conv_param);

return run_conv_bwd_data<3,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp,
DeviceConvNdBwdDataInstance<3>>(do_verification,
init_method,
time_kernel,
conv_param,
in_g_n_c_wis_desc,
wei_g_k_c_xs_desc,
out_g_n_k_wos_desc,
in_element_op,
wei_element_op,
out_element_op);
}

return 0;
}
Loading
Loading