Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Nov 29, 2024
1 parent 10e1596 commit 89a6287
Show file tree
Hide file tree
Showing 14 changed files with 272 additions and 184 deletions.
2 changes: 1 addition & 1 deletion benchmarking/debug.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export CUDA_VISIBLE_DEVICES=1
gdb -ex run --args ./inference/peft/peft \
-ll:cpu 4 -ll:gpu $NGPUS -ll:util 4 \
-ll:fsize 20000 -ll:zsize 10000 \
--fusion \
--verbose \
-llm-model $MODEL_NAME \
-enable-peft -peft-model $PEFT_MODEL_NAME \
-finetuning-dataset /usr/FlexFlow/inference/prompt/peft_dataset.json \
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ enum TaskIDs {
EMBED_INIT_TASK_ID,
EMBED_FWD_TASK_ID,
EMBED_INF_TASK_ID,
EMBED_PEFT_BWD_TASK_ID,
EMBED_BWD_TASK_ID,
GATHER_INIT_TASK_ID,
GATHER_FWD_TASK_ID,
Expand Down
4 changes: 4 additions & 0 deletions include/flexflow/ops/embedding.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ class Embedding : public Op {
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Legion::Runtime *runtime);
static bool peft_bwd_task(Legion::Task const *task,
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Legion::Runtime *runtime);
static void backward_task(Legion::Task const *task,
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Expand Down
2 changes: 1 addition & 1 deletion include/flexflow/ops/fused.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class FusedOp : public Op {
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Legion::Runtime *runtime);
static void peft_bwd_task(Legion::Task const *task,
static bool peft_bwd_task(Legion::Task const *task,
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Legion::Runtime *runtime);
Expand Down
19 changes: 12 additions & 7 deletions include/flexflow/ops/kernels/softmax_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@
#include "flexflow/fftype.h"
#include "flexflow/op_meta.h"
#include "flexflow/ops/softmax.h"
#include "flexflow/ffconst_utils.h"
#include "flexflow/utils/memory_allocator.h"

namespace FlexFlow {

class SoftmaxMeta : public OpMeta {
public:
SoftmaxMeta(FFHandler handle,
SoftmaxMeta(FFHandler handler,
Softmax const *softmax,
Legion::Domain const &input_domain);
Legion::Domain const &input_domain,
bool is_last_op,
MemoryAllocator &gpu_mem_allocator);
#if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA)
cudnnTensorDescriptor_t inputTensor;
cudnnTensorDescriptor_t outputTensor;
Expand All @@ -21,6 +25,10 @@ class SoftmaxMeta : public OpMeta {
miopenTensorDescriptor_t outputTensor;
#endif
int dim;
// PEFT related fields
Realm::RegionInstance reserveInst;
void *output_grad_ptr = nullptr;
size_t allocated_peft_buffer_size = 0;
};

namespace Kernels {
Expand All @@ -38,13 +46,11 @@ void inference_kernel_wrapper(SoftmaxMeta const *m,
BatchConfig const *bc,
bool is_last_op,
GenericTensorAccessorR const &input,
GenericTensorAccessorW const &output,
GenericTensorAccessorW const &output_grad);
GenericTensorAccessorW const &output);

void peft_bwd_kernel_wrapper(SoftmaxMeta const *m,
BatchConfig const *bc,
GenericTensorAccessorW const &input_grad,
GenericTensorAccessorR const &output_grad);
GenericTensorAccessorW const &input_grad);

namespace Internal {
template <typename DT>
Expand Down Expand Up @@ -72,7 +78,6 @@ template <typename DT>
void peft_bwd_kernel(SoftmaxMeta const *m,
BatchConfig const *bc,
DT *input_grad_ptr,
DT const *output_grad_ptr,
int num_classes,
ffStream_t stream);

Expand Down
50 changes: 48 additions & 2 deletions src/ops/embedding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -667,14 +667,60 @@ void Embedding::backward(FFModel const &ff) {
runtime->execute_index_space(ctx, launcher);
}

bool Embedding::peft_bwd_task(Task const *task,
std::vector<PhysicalRegion> const &regions,
Context ctx,
Runtime *runtime) {
EmbeddingMeta *m = *((EmbeddingMeta **)task->local_args);
assert(regions.size() == 2);
assert(task->regions.size() == 2);
BatchConfig const *bc = BatchConfig::from_future(task->futures[0]);
GenericTensorAccessorW grad_input = helperGetGenericTensorAccessorWO(
m->input_type[0], regions[0], task->regions[0], FID_DATA, ctx, runtime);
GenericTensorAccessorR grad_output = helperGetGenericTensorAccessorRO(
m->output_type[0], regions[1], task->regions[1], FID_DATA, ctx, runtime);
return true;
}

Legion::FutureMap
Embedding::peft_bwd(FFModel const &ff,
BatchConfigFuture const &bc,
std::vector<ParallelTensor> const &batch_inputs,
std::vector<ParallelTensor> const &batch_outputs,
MachineView const *mv) {
// nothing to do (backward function only updates weights)
return FutureMap();
ArgumentMap argmap;
Context ctx = ff.config.lg_ctx;
Runtime *runtime = ff.config.lg_hlr;
parallel_is = batch_outputs[0]->parallel_is;
MachineView const *view = mv ? mv : &batch_outputs[0]->machine_view;
set_argumentmap_for_inference(ff, argmap, batch_outputs[0]);
size_t machine_view_hash = view->hash();
/* std::cout << "Linear op machine_view: " << *(MachineView const *)mv
<< std::endl; */
IndexLauncher launcher(EMBED_PEFT_BWD_TASK_ID,
parallel_is,
TaskArgument(nullptr, 0),
argmap,
Predicate::TRUE_PRED,
false /*must*/,
0 /*mapper_id*/,
machine_view_hash);
launcher.add_future(bc);
launcher.add_region_requirement(
RegionRequirement(batch_inputs[0]->part_grad,
0 /*projection id*/,
WRITE_ONLY,
EXCLUSIVE,
batch_inputs[0]->region_grad));
launcher.add_field(0, FID_DATA);
launcher.add_region_requirement(
RegionRequirement(batch_outputs[0]->part_grad,
0 /*projection id*/,
READ_ONLY,
EXCLUSIVE,
batch_outputs[0]->region_grad));
launcher.add_field(1, FID_DATA);
return runtime->execute_index_space(ctx, launcher);
}

void Embedding::backward_task(Task const *task,
Expand Down
16 changes: 1 addition & 15 deletions src/ops/fused.cc
Original file line number Diff line number Diff line change
Expand Up @@ -695,20 +695,6 @@ FutureMap FusedOp::inference(FFModel const &ff,
launcher.add_field(offset + i, FID_DATA);
}
offset += numOutputs;
// add softmax output grad
if (operators[numOperators - 1]->op_type == OP_SOFTMAX) {
// printf("operator %i is last SOFTMAX! adding grad for output %i\n",
// numOperators - 1,
// numOutputs - 1);
assert(outputs[numOutputs - 1]->region != LogicalRegion::NO_REGION);
launcher.add_region_requirement(
RegionRequirement(batch_outputs[numOutputs - 1]->part_grad,
0 /*projection id*/,
WRITE_ONLY,
EXCLUSIVE,
batch_outputs[numOutputs - 1]->region_grad));
launcher.add_field(offset, FID_DATA);
}
return runtime->execute_index_space(ctx, launcher);
}

Expand Down Expand Up @@ -767,7 +753,7 @@ FutureMap FusedOp::peft_bwd(FFModel const &ff,
launcher.add_region_requirement(
RegionRequirement(batch_outputs[i]->part_grad,
0 /*projection id*/,
i == numOutputs - 1 ? READ_WRITE : WRITE_ONLY,
WRITE_ONLY,
EXCLUSIVE,
batch_outputs[i]->region_grad));
launcher.add_field(offset + i, FID_DATA);
Expand Down
3 changes: 2 additions & 1 deletion src/ops/fused.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ __host__ void
regions[...](I): weights
regions[...](O): outputs
*/
__host__ void FusedOp::peft_bwd_task(Task const *task,
__host__ bool FusedOp::peft_bwd_task(Task const *task,
std::vector<PhysicalRegion> const &regions,
Context ctx,
Runtime *runtime) {
Expand Down Expand Up @@ -1230,6 +1230,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
false);
}
}
return true;
}

/*
Expand Down
52 changes: 19 additions & 33 deletions src/ops/fused.cu
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,7 @@ __host__ void

assert(metas->numOperators == fused->numOperators);
assert(regions.size() == task->regions.size());
bool softmax_grad_additional_region =
(fused->op_op_type[fused->numOperators - 1] == OP_SOFTMAX);
assert((int)regions.size() == fused->numInputs + fused->numWeights +
fused->numOutputs +
softmax_grad_additional_region);
assert((int)regions.size() == fused->numInputs + fused->numWeights + fused->numOutputs);
GenericTensorAccessorR input_accessor[MAX_NUM_INPUTS];
GenericTensorAccessorR weight_accessor[MAX_NUM_WEIGHTS];
GenericTensorAccessorW output_accessor[MAX_NUM_OUTPUTS];
Expand Down Expand Up @@ -600,23 +596,13 @@ __host__ void
assert(fused->op_num_outputs[op] == 1);
assert(my_input_accessor[0].domain.get_volume() ==
my_output_accessor[0].domain.get_volume());
if (op == fused->numOperators - 1) { // if this is the final operator
output_accessor[fused->numOutputs] = helperGetGenericTensorAccessorWO(
fused->output_data_types[fused->numOutputs - 1],
regions[roff],
task->regions[roff],
FID_DATA,
ctx,
runtime);
}
SoftmaxMeta *m = (SoftmaxMeta *)metas->meta[op];
Kernels::Softmax::inference_kernel_wrapper(
m,
bc,
(op == fused->numOperators - 1),
my_input_accessor[0],
my_output_accessor[0],
output_accessor[fused->numOutputs]);
my_output_accessor[0]);
break;
}
case OP_ALLREDUCE: {
Expand Down Expand Up @@ -680,7 +666,7 @@ __host__ void
regions[...](I): weights
regions[...](O): outputs
*/
__host__ void FusedOp::peft_bwd_task(Task const *task,
__host__ bool FusedOp::peft_bwd_task(Task const *task,
std::vector<PhysicalRegion> const &regions,
Context ctx,
Runtime *runtime) {
Expand All @@ -691,7 +677,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
BatchConfig const *bc = BatchConfig::from_future(task->futures[0]);
// Return if no active PEFT bwd tokens
if (bc->num_finetuning_tokens() == 0) {
return;
return false;
}

assert(metas->numOperators == fused->numOperators);
Expand Down Expand Up @@ -859,7 +845,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
LinearMeta *m = (LinearMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
assert(m->input_type[0] == my_input_grad_accessor[0].data_type);
assert(m->input_type[0] == my_output_grad_accessor[0].data_type);
Expand Down Expand Up @@ -891,7 +877,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
LoraLinearMeta *m = (LoraLinearMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
assert(m->input_type[0] == my_input_grad_accessor[0].data_type);
assert(m->output_type[0] == my_output_grad_accessor[0].data_type);
Expand Down Expand Up @@ -1012,7 +998,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
RMSNormMeta const *m = (RMSNormMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
Kernels::RMSNorm::peft_bwd_kernel_wrapper(m,
bc,
Expand All @@ -1028,7 +1014,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
ResidualRMSNormMeta const *m = (ResidualRMSNormMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
Kernels::ResidualRMSNorm::peft_bwd_kernel_wrapper(
m,
Expand All @@ -1047,7 +1033,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
(IncMultiHeadSelfAttentionMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
assert(fused->op_num_weights[op] == 0);
GenericTensorAccessorR biases;
Expand All @@ -1072,7 +1058,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
LayerNormMeta const *m = (LayerNormMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
if (m->elementwise_affine) {
assert(fused->op_num_weights[op] == 1 + (int)(m->use_bias));
Expand All @@ -1094,7 +1080,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
(ResidualLayerNormMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
if (m->use_two_residuals) {
assert(fused->op_num_inputs[op] == 3);
Expand Down Expand Up @@ -1133,7 +1119,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
(AddBiasResidualLayerNormMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
if (!m->elementwise_affine) {
assert(fused->op_num_weights[op] == 1); // attn bias
Expand Down Expand Up @@ -1163,7 +1149,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
SigmoidSiluMultiMeta const *m = (SigmoidSiluMultiMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
SigmoidSiluMulti::peft_bwd_kernel_wrapper(m,
bc,
Expand All @@ -1181,10 +1167,9 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
SoftmaxMeta *m = (SoftmaxMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
Kernels::Softmax::peft_bwd_kernel_wrapper(
m, bc, my_input_grad_accessor[0], my_output_grad_accessor[0]);
Kernels::Softmax::peft_bwd_kernel_wrapper(m, bc, my_input_grad_accessor[0]);
break;
}
case OP_ALLREDUCE: {
Expand All @@ -1193,7 +1178,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
AllReduceMeta const *m = (AllReduceMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
Kernels::AllReduce::peft_bwd_kernel_wrapper(
m, bc, my_input_grad_accessor[0], my_output_grad_accessor[0]);
Expand All @@ -1205,7 +1190,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
ParallelIdentityMeta const *m = (ParallelIdentityMeta *)metas->meta[op];
if (!bc->peft_bwd_applies_to_this_layer(
m->layer_guid.transformer_layer_id)) {
return;
return false;
}
runtime->concurrent_task_barrier(ctx);
Kernels::ParallelIdentity::peft_bwd_kernel_wrapper(
Expand Down Expand Up @@ -1242,7 +1227,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
int shard_id = task->index_point.point_data[0];
if (!bc->peft_bwd_applies_to_this_layer(
metas->meta[op]->layer_guid.transformer_layer_id)) {
return;
return false;
}
FusedOp::save_inference_tensors_to_file(metas->meta[op],
shard_id,
Expand All @@ -1253,6 +1238,7 @@ __host__ void FusedOp::peft_bwd_task(Task const *task,
false);
}
}
return true;
}

/*
Expand Down
Loading

0 comments on commit 89a6287

Please sign in to comment.