35 template <CudnnFindSamplingTechnique samplingTechnique>
39 std::map<float, ExecutionPlan &> timed_execution_plans;
41 const int maxIterCount =
45 const float threshhold = 0.95f;
47 cudaEvent_t start, stop;
48 cudaEventCreate(&start);
49 cudaEventCreate(&stop);
50 cudaDeviceSynchronize();
52 for (
auto &plan : plans) {
54 float final_time_ms = 0.0f;
55 float min_time_ms = std::numeric_limits<float>::max();
58 ::cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc());
59 cudaDeviceSynchronize();
61 for (
int i = 0; i < maxIterCount; i++) {
62 cudaEventRecord(start);
64 ::cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc());
66 cudaEventRecord(stop);
67 cudaEventSynchronize(stop);
68 cudaEventElapsedTime(&time_ms, start, stop);
71 final_time_ms = std::min(min_time_ms, time_ms);
72 if (time_ms / min_time_ms < threshhold) {
73 min_time_ms = final_time_ms;
78 final_time_ms = i == (maxIterCount / 2) ? time_ms : final_time_ms;
81 timed_execution_plans.insert({final_time_ms, plan});
84 timed_execution_plans.begin(),
85 timed_execution_plans.end(),
86 std::back_inserter(time_sorted_plans),
87 [](
const std::map<float, cudnn_frontend::ExecutionPlan &>::value_type &pair) ->
struct executionOption {
88 return {std::move(pair.second), pair.first};
91 cudaEventDestroy(start);
92 cudaEventDestroy(stop);
94 return time_sorted_plans;
97 template <CudnnFindSamplingTechnique samplingTechnique>
105 for (
auto &engine_config : generate_engine_config(opGraph)) {
106 #ifndef NV_CUDNN_DISABLE_EXCEPTION 111 #ifndef NV_CUDNN_DISABLE_EXCEPTION 117 return time_sorted_plan<samplingTechnique>(handle,
filter(pred, plans), variantPack);
Sample 3 times and take median.
Sample multiple times till stable.
std::function< bool(cudnn_frontend::ExecutionPlan const &plan)> Predicate
std::vector< cudnn_frontend::ExecutionPlan > executionPlans_t
std::vector< struct executionOption > executionOptions_t
Variety of renames.
auto filter(Predicate pred, executionPlans_t &plans) -> executionPlans_t
auto time_sorted_plan(cudnnHandle_t handle, executionPlans_t plans, VariantPack &variantPack) -> executionOptions_t
auto cudnnFindPlan(cudnnHandle_t handle, cudnn_frontend::OperationGraph &&opGraph, cudnn_frontend::VariantPack &variantPack, Predicate pred) -> executionOptions_t
Sample once quick but may have unstable values.