Skip to content

Commit

Permalink
Merge branch 'inference' into inference
Browse files Browse the repository at this point in the history
  • Loading branch information
stelleg authored Aug 8, 2024
2 parents c33cc14 + 6a1a188 commit 9a34e9e
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ __global__ void apply_proj_bias_qkv(DT *input_ptr,
int num_heads,
int num_kv_heads,
bool scaling_query,
float scaling_factor);
float scaling_factor,
int hidden_size);

#if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA)
template <typename DT>
Expand Down
2 changes: 1 addition & 1 deletion src/ops/attention.cu
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ MultiHeadAttentionMeta::MultiHeadAttentionMeta(FFHandler handler,
checkCUDNN(cudnnCreateSeqDataDescriptor(&oDesc));
// Currently do not support adding bias to key/value projection
assert(!attn->add_bias_kv);
cudnnAttnQueryMap_t attnMode = CUDNN_ATTN_QUERYMAP_ALL_TO_ONE;
unsigned attnMode = CUDNN_ATTN_QUERYMAP_ALL_TO_ONE;
// Assume no beam search for now
int maxBeamSize = 1;
// printf("batchSize(%d) qSize(%d) kSize(%d) vSize(%d) qProjSize(%d)
Expand Down

0 comments on commit 9a34e9e

Please sign in to comment.