Skip to content

Commit

Permalink
chore: remove unused
Browse files Browse the repository at this point in the history
  • Loading branch information
chenzhuofu committed Nov 29, 2024
1 parent d09124c commit 115a3ff
Showing 1 changed file with 0 additions and 19 deletions.
19 changes: 0 additions & 19 deletions src/ops/kernels/inc_multihead_self_attention_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,6 @@ void compute_qkv(IncMultiHeadSelfAttentionMeta const *m,
checkCUDA(cublasSetStream(m->handle.blas, stream));
checkCUDNN(cudnnSetStream(m->handle.dnn, stream));
cudaDataType_t cublas_data_type = ff_to_cuda_datatype(m->output_type[0]);
#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
cudaDataType_t compute_type = cublas_data_type;
#else
// For best performance, set the default cublas compute type to
// CUBLAS_COMPUTE_16F for half precision and to
// CUBLAS_COMPUTE_32F_FAST_16F for full precision
cublasComputeType_t compute_type = CUBLAS_COMPUTE_16F;
if (m->output_type[0] == DT_FLOAT) {
compute_type = CUBLAS_COMPUTE_32F_FAST_16F;
}
#endif

// int device;
// checkCUDA(cudaGetDevice(&device));
// cudaEvent_t t_start, t_end;
Expand Down Expand Up @@ -779,13 +767,6 @@ void compute_o_prod_bias(IncMultiHeadSelfAttentionMeta const *m,
cudaDataType_t cublas_data_type = ff_to_cuda_datatype(m->output_type[0]);
cudnnDataType_t cudnn_data_type = ff_to_cudnn_datatype(m->output_type[0]);
assert(data_type_size(m->output_type[0]) == sizeof(DT));
#if CUDA_VERSION >= 11000
// TODO: currently set the default to CUBLAS_COMPUTE_16F for best
// performance
cublasComputeType_t compute_type = CUBLAS_COMPUTE_16F;
#else
cudaDataType_t compute_type = cublas_data_type;
#endif
// Project to output, save result directly on output tensor
{
DT alpha = 1.0f, beta = 0.0f;
Expand Down

0 comments on commit 115a3ff

Please sign in to comment.