diff --git a/eval_results b/eval_results new file mode 100644 index 00000000..9d65d82f --- /dev/null +++ b/eval_results @@ -0,0 +1,175 @@ +================================================================================ + EVALUATION REPORT +================================================================================ + + +MODEL INFO: {'name': 'otter_image', 'model_path': '/mnt/petrelfs/zhangyuanhan/Otter/checkpoints/otter_llava_sft_nonconv_nogroup'} +-------------------------------------------------------------------------------- +[2023-12-20 17:11:37,449] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect) +Imported class: +The current model version is configured for Otter-Image with max_num_frames set to None. +Parameter: lang_encoder.model.embed_tokens.weight, Size: 131.084288 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.ff_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.ff_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.ff_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.ff_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.ff_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.ff_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.ff_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.ff_gate, Size: 0.000001 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M +Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M +Parameter: lang_encoder.lm_head.weight, Size: 131.084288 M +Parameter: perceiver.latents, Size: 0.065536 M +Parameter: perceiver.layers.0.norm_media.weight, Size: 0.001024 M +Parameter: perceiver.layers.0.norm_media.bias, Size: 0.001024 M +Parameter: perceiver.layers.0.norm_latents.weight, Size: 0.001024 M +Parameter: perceiver.layers.0.norm_latents.bias, Size: 0.001024 M +Parameter: perceiver.layers.0.to_q.weight, Size: 0.524288 M +Parameter: perceiver.layers.0.to_kv.weight, Size: 1.048576 M +Parameter: perceiver.layers.0.to_out.weight, Size: 0.524288 M +Parameter: perceiver.layers.0.feed_forward.0.weight, Size: 0.001024 M +Parameter: perceiver.layers.0.feed_forward.0.bias, Size: 0.001024 M +Parameter: perceiver.layers.0.feed_forward.1.weight, Size: 4.194304 M +Parameter: perceiver.layers.0.feed_forward.3.weight, Size: 4.194304 M +Parameter: perceiver.layers.1.norm_media.weight, Size: 0.001024 M +Parameter: perceiver.layers.1.norm_media.bias, Size: 0.001024 M +Parameter: perceiver.layers.1.norm_latents.weight, Size: 0.001024 M +Parameter: perceiver.layers.1.norm_latents.bias, Size: 0.001024 M +Parameter: perceiver.layers.1.to_q.weight, Size: 0.524288 M +Parameter: perceiver.layers.1.to_kv.weight, Size: 1.048576 M +Parameter: perceiver.layers.1.to_out.weight, Size: 0.524288 M +Parameter: perceiver.layers.1.feed_forward.0.weight, Size: 0.001024 M +Parameter: perceiver.layers.1.feed_forward.0.bias, Size: 0.001024 M +Parameter: perceiver.layers.1.feed_forward.1.weight, Size: 4.194304 M +Parameter: perceiver.layers.1.feed_forward.3.weight, Size: 4.194304 M +Parameter: perceiver.layers.2.norm_media.weight, Size: 0.001024 M +Parameter: perceiver.layers.2.norm_media.bias, Size: 0.001024 M +Parameter: perceiver.layers.2.norm_latents.weight, Size: 0.001024 M +Parameter: perceiver.layers.2.norm_latents.bias, Size: 0.001024 M +Parameter: perceiver.layers.2.to_q.weight, Size: 0.524288 M +Parameter: perceiver.layers.2.to_kv.weight, Size: 1.048576 M +Parameter: perceiver.layers.2.to_out.weight, Size: 0.524288 M +Parameter: perceiver.layers.2.feed_forward.0.weight, Size: 0.001024 M +Parameter: perceiver.layers.2.feed_forward.0.bias, Size: 0.001024 M +Parameter: perceiver.layers.2.feed_forward.1.weight, Size: 4.194304 M +Parameter: perceiver.layers.2.feed_forward.3.weight, Size: 4.194304 M +Parameter: perceiver.layers.3.norm_media.weight, Size: 0.001024 M +Parameter: perceiver.layers.3.norm_media.bias, Size: 0.001024 M +Parameter: perceiver.layers.3.norm_latents.weight, Size: 0.001024 M +Parameter: perceiver.layers.3.norm_latents.bias, Size: 0.001024 M +Parameter: perceiver.layers.3.to_q.weight, Size: 0.524288 M +Parameter: perceiver.layers.3.to_kv.weight, Size: 1.048576 M +Parameter: perceiver.layers.3.to_out.weight, Size: 0.524288 M +Parameter: perceiver.layers.3.feed_forward.0.weight, Size: 0.001024 M +Parameter: perceiver.layers.3.feed_forward.0.bias, Size: 0.001024 M +Parameter: perceiver.layers.3.feed_forward.1.weight, Size: 4.194304 M +Parameter: perceiver.layers.3.feed_forward.3.weight, Size: 4.194304 M +Parameter: perceiver.layers.4.norm_media.weight, Size: 0.001024 M +Parameter: perceiver.layers.4.norm_media.bias, Size: 0.001024 M +Parameter: perceiver.layers.4.norm_latents.weight, Size: 0.001024 M +Parameter: perceiver.layers.4.norm_latents.bias, Size: 0.001024 M +Parameter: perceiver.layers.4.to_q.weight, Size: 0.524288 M +Parameter: perceiver.layers.4.to_kv.weight, Size: 1.048576 M +Parameter: perceiver.layers.4.to_out.weight, Size: 0.524288 M +Parameter: perceiver.layers.4.feed_forward.0.weight, Size: 0.001024 M +Parameter: perceiver.layers.4.feed_forward.0.bias, Size: 0.001024 M +Parameter: perceiver.layers.4.feed_forward.1.weight, Size: 4.194304 M +Parameter: perceiver.layers.4.feed_forward.3.weight, Size: 4.194304 M +Parameter: perceiver.layers.5.norm_media.weight, Size: 0.001024 M +Parameter: perceiver.layers.5.norm_media.bias, Size: 0.001024 M +Parameter: perceiver.layers.5.norm_latents.weight, Size: 0.001024 M +Parameter: perceiver.layers.5.norm_latents.bias, Size: 0.001024 M +Parameter: perceiver.layers.5.to_q.weight, Size: 0.524288 M +Parameter: perceiver.layers.5.to_kv.weight, Size: 1.048576 M +Parameter: perceiver.layers.5.to_out.weight, Size: 0.524288 M +Parameter: perceiver.layers.5.feed_forward.0.weight, Size: 0.001024 M +Parameter: perceiver.layers.5.feed_forward.0.bias, Size: 0.001024 M +Parameter: perceiver.layers.5.feed_forward.1.weight, Size: 4.194304 M +Parameter: perceiver.layers.5.feed_forward.3.weight, Size: 4.194304 M +Parameter: perceiver.norm.weight, Size: 0.001024 M +Parameter: perceiver.norm.bias, Size: 0.001024 M +Total Trainable param: 1.441004 B +Imported class: + +DATASET: MMEDataset +-------------------- +=========== Cognition =========== diff --git a/pipeline/accelerate_configs/accelerate_config_zero2.yaml b/pipeline/accelerate_configs/accelerate_config_zero2.yaml index 10b8c3cb..c3f07907 100755 --- a/pipeline/accelerate_configs/accelerate_config_zero2.yaml +++ b/pipeline/accelerate_configs/accelerate_config_zero2.yaml @@ -14,5 +14,5 @@ main_process_port: 29501 main_training_function: main mixed_precision: bf16 num_machines: 1 -num_processes: 1 +num_processes: 8 use_cpu: false \ No newline at end of file diff --git a/pipeline/benchmarks/evaluate.py b/pipeline/benchmarks/evaluate.py index 3097e2b8..4c233b73 100644 --- a/pipeline/benchmarks/evaluate.py +++ b/pipeline/benchmarks/evaluate.py @@ -108,6 +108,7 @@ def flush(self): model_infos = [{"name": name} for name in model_names] dataset_infos = [{"name": dataset_name, "cache_dir": phrased_args.cache_dir} for dataset_name in phrased_args.datasets.split(",")] + # import pdb;pdb.set_trace() if not os.path.exists(os.path.dirname(phrased_args.output)): os.makedirs(os.path.dirname(phrased_args.output)) diff --git a/pipeline/mimicit_utils/mimicit_dataset.py b/pipeline/mimicit_utils/mimicit_dataset.py index 91711f35..952d1191 100755 --- a/pipeline/mimicit_utils/mimicit_dataset.py +++ b/pipeline/mimicit_utils/mimicit_dataset.py @@ -595,6 +595,8 @@ def merge(key, pad_idx, pading_size=None): try: if samples[0].get("patch_images", None) is not None: batch["net_input"]["patch_images"] = torch.stack([sample["patch_images"] for sample in samples], dim=0) + else: + import pdb;pdb.set_trace() except Exception as e: print(f"Error: {e}") print(batch["id"]) @@ -709,7 +711,7 @@ def preload_dataset(path): args.tokenizer = text_tokenizer dataset_info = preload_dataset("/mnt/petrelfs/zhangyuanhan/Otter/shared_scripts/llava_sft_noconv_nogrounp.yaml") - dataset = MimicitDataset(args, dataset_info["TEXT_ONLY"], "TEXT_ONLY") + dataset = MimicitDataset(args, dataset_info["IMAGE_TEXT"], "IMAGE_TEXT") sampler = RandomSampler(dataset, replacement=True, num_samples=len(dataset)) # sampler = DistributedProxySampler(sampler, num_replicas=8, rank=7) # import pdb;pdb.set_trace() @@ -728,7 +730,8 @@ def preload_dataset(path): cycle_data = cycle(dataloader) while True: _ = next(cycle_data) - net_input = _.pop("net_input") + print(len(_["net_input"]["patch_images"])) + # net_input = _.pop("net_input") # for _ in cycle(dataloader): # pass # print(_["net_input"]) diff --git a/pipeline/train/instruction_following.py b/pipeline/train/instruction_following.py index a99cbb64..7b2b6f9b 100755 --- a/pipeline/train/instruction_following.py +++ b/pipeline/train/instruction_following.py @@ -159,7 +159,7 @@ def get_dataloader_from_sequence(sequence, current_step): global_step = num_steps + epoch * num_batches_per_epoch # dataloader_iterator = get_next_dataloader(dataloader_iterators, weights) dataloader_iterator = get_dataloader_from_sequence(dataloader_sequence, num_steps) - import pdb;pdb.set_trace() + # import pdb;pdb.set_trace() batch_mimicit = next(dataloader_iterator) # Fetch a batch from the chosen dataloader if args.model_name != "fuyu": # design fuyu's process into it's processor, a way better design than following code. @@ -174,7 +174,7 @@ def get_dataloader_from_sequence(sequence, current_step): master_print(e) # print("batch_mimicit",batch_mimicit) # print("dataloader_iterator":dataloader_iterator) - import pdb;pdb.set_trace() + # import pdb;pdb.set_trace() continue # pass # import pdb;pdb.set_trace() @@ -210,11 +210,11 @@ def masking(masking_number: int = -100): return labels labels = masking() - if args.remove_answer_token: - input_ids, labels, attention_mask = find_and_remove_tokens(input_ids, labels, attention_mask, answer_token_id, tokenizer) # find and remove certain tokens from input_ids, labels, and attention_mask + # if args.remove_answer_token: + # input_ids, labels, attention_mask = find_and_remove_tokens(input_ids, labels, attention_mask, answer_token_id, tokenizer) # find and remove certain tokens from input_ids, labels, and attention_mask - if args.remove_eos_token: - input_ids, labels, attention_mask = find_and_remove_tokens(input_ids, labels, attention_mask, endofchunk_token_id, tokenizer) + # if args.remove_eos_token: + # input_ids, labels, attention_mask = find_and_remove_tokens(input_ids, labels, attention_mask, endofchunk_token_id, tokenizer) # put the processed content back into batch_mimicit batch_mimicit["input_ids"] = input_ids @@ -501,14 +501,14 @@ def main(): model.train() # Main Training Loop for epoch in range(resume_from_epoch, args.num_epochs): - save_hf_weights( - model, - args, - accelerator, - processor=processor if "idefics" in args.model_name.lower() or "fuyu" in args.model_name.lower() else None, - tokenizer=tokenizer if "llama2" in args.model_name.lower() else None, - epoch=epoch + 1, - ) + # save_hf_weights( + # model, + # args, + # accelerator, + # processor=processor if "idefics" in args.model_name.lower() or "fuyu" in args.model_name.lower() else None, + # tokenizer=tokenizer if "llama2" in args.model_name.lower() else None, + # epoch=epoch + 1, + # ) train_one_epoch( args=args, model=model, @@ -535,15 +535,15 @@ def main(): master_print(f"Saved checkpoint at epoch {epoch+1}.") accelerator.wait_for_everyone() - # Save the final weights - save_hf_weights( - model, - args, - accelerator, - processor=processor if "idefics" in args.model_name.lower() or "fuyu" in args.model_name.lower() else None, - tokenizer=tokenizer if "llama2" in args.model_name.lower() else None, - ) - accelerator.wait_for_everyone() + # # Save the final weights + # save_hf_weights( + # model, + # args, + # accelerator, + # processor=processor if "idefics" in args.model_name.lower() or "fuyu" in args.model_name.lower() else None, + # tokenizer=tokenizer if "llama2" in args.model_name.lower() else None, + # ) + # accelerator.wait_for_everyone() if __name__ == "__main__": diff --git a/shared_scripts/benchmark.yaml b/shared_scripts/benchmark.yaml index 7771cec6..c6de0e6b 100644 --- a/shared_scripts/benchmark.yaml +++ b/shared_scripts/benchmark.yaml @@ -17,14 +17,16 @@ datasets: # - name: mmbench # split: dev # prompt: Answer with the option's letter from the given choices directly. - - name: pope - split: test - default_output_path: ./logs + # - name: pope + # split: test + # default_output_path: ./logs # - name: magnifierbench # split: test # prompt: Answer with the option's letter from the given choices directly. # api_key: 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 models: - - name: llava_model - model_path: /mnt/petrelfs/zhangyuanhan/LLaVA/checkpoints/llava-v1.5-7b \ No newline at end of file + # - name: llava_model + # model_path: /mnt/petrelfs/zhangyuanhan/LLaVA/checkpoints/llava-v1.5-7b + - name: otter_image + model_path: /mnt/petrelfs/zhangyuanhan/Otter/checkpoints/otter_llava_sft_nonconv_nogroup \ No newline at end of file diff --git a/shared_scripts/llava_sft_noconv_nogrounp.yaml b/shared_scripts/llava_sft_noconv_nogrounp.yaml index 9c4ba4e1..286c0e96 100644 --- a/shared_scripts/llava_sft_noconv_nogrounp.yaml +++ b/shared_scripts/llava_sft_noconv_nogrounp.yaml @@ -3,30 +3,30 @@ IMAGE_TEXT: mimicit_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_COCO.json images_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_COCO_Images.parquet train_config_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_COCO_train.json - num_samples: 1 + num_samples: -1 LA_MIX_GQA: mimicit_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_GQA.json images_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_GQA_Images.parquet train_config_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_GQA_train.json - num_samples: 1 + num_samples: -1 LA_MIX_OCRQA: mimicit_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_OCR_VQA.json images_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_OCR_VQA_Images.parquet train_config_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_OCR_VQA_train.json - num_samples: 1 + num_samples: -1 LA_MIX_TEXTVQA: mimicit_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_TEXTVQA.json images_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_TEXTVQA_Images.parquet train_config_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_TEXTVQA_train.json - num_samples: 1 + num_samples: -1 LA_MIX_VG: mimicit_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_VG.json images_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_VG_Images.parquet train_config_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_VG_train.json - num_samples: 1 + num_samples: -1 TEXT_ONLY: LA_MIX_TEXT: mimicit_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_TEXT.json train_config_path: /mnt/petrelfs/share_data/zhangyuanhan/m3it/LA_MIX/LA_MIX_TEXT_train.json - num_samples: 1 \ No newline at end of file + num_samples: -1 \ No newline at end of file