Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhangYuanhan-AI committed Dec 21, 2023
1 parent 656db89 commit 3146598
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 37 deletions.
175 changes: 175 additions & 0 deletions eval_results
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
================================================================================
EVALUATION REPORT
================================================================================


MODEL INFO: {'name': 'otter_image', 'model_path': '/mnt/petrelfs/zhangyuanhan/Otter/checkpoints/otter_llava_sft_nonconv_nogroup'}
--------------------------------------------------------------------------------
[2023-12-20 17:11:37,449] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Imported class: <class 'pipeline.benchmarks.models.otter_image.OtterImage'>
The current model version is configured for Otter-Image with max_num_frames set to None.
Parameter: lang_encoder.model.embed_tokens.weight, Size: 131.084288 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.ff_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.3.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.ff_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.7.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.ff_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.11.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.ff_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.15.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.ff_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.19.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.ff_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.23.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.ff_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.27.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.ff_gate, Size: 0.000001 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.norm.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.norm.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.to_q.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.to_kv.weight, Size: 1.048576 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.attn.to_out.weight, Size: 2.097152 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.feed_forward.0.weight, Size: 0.004096 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.feed_forward.0.bias, Size: 0.004096 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.feed_forward.1.weight, Size: 67.108864 M
Parameter: lang_encoder.model.layers.31.gated_cross_attn_layer.feed_forward.3.weight, Size: 67.108864 M
Parameter: lang_encoder.lm_head.weight, Size: 131.084288 M
Parameter: perceiver.latents, Size: 0.065536 M
Parameter: perceiver.layers.0.norm_media.weight, Size: 0.001024 M
Parameter: perceiver.layers.0.norm_media.bias, Size: 0.001024 M
Parameter: perceiver.layers.0.norm_latents.weight, Size: 0.001024 M
Parameter: perceiver.layers.0.norm_latents.bias, Size: 0.001024 M
Parameter: perceiver.layers.0.to_q.weight, Size: 0.524288 M
Parameter: perceiver.layers.0.to_kv.weight, Size: 1.048576 M
Parameter: perceiver.layers.0.to_out.weight, Size: 0.524288 M
Parameter: perceiver.layers.0.feed_forward.0.weight, Size: 0.001024 M
Parameter: perceiver.layers.0.feed_forward.0.bias, Size: 0.001024 M
Parameter: perceiver.layers.0.feed_forward.1.weight, Size: 4.194304 M
Parameter: perceiver.layers.0.feed_forward.3.weight, Size: 4.194304 M
Parameter: perceiver.layers.1.norm_media.weight, Size: 0.001024 M
Parameter: perceiver.layers.1.norm_media.bias, Size: 0.001024 M
Parameter: perceiver.layers.1.norm_latents.weight, Size: 0.001024 M
Parameter: perceiver.layers.1.norm_latents.bias, Size: 0.001024 M
Parameter: perceiver.layers.1.to_q.weight, Size: 0.524288 M
Parameter: perceiver.layers.1.to_kv.weight, Size: 1.048576 M
Parameter: perceiver.layers.1.to_out.weight, Size: 0.524288 M
Parameter: perceiver.layers.1.feed_forward.0.weight, Size: 0.001024 M
Parameter: perceiver.layers.1.feed_forward.0.bias, Size: 0.001024 M
Parameter: perceiver.layers.1.feed_forward.1.weight, Size: 4.194304 M
Parameter: perceiver.layers.1.feed_forward.3.weight, Size: 4.194304 M
Parameter: perceiver.layers.2.norm_media.weight, Size: 0.001024 M
Parameter: perceiver.layers.2.norm_media.bias, Size: 0.001024 M
Parameter: perceiver.layers.2.norm_latents.weight, Size: 0.001024 M
Parameter: perceiver.layers.2.norm_latents.bias, Size: 0.001024 M
Parameter: perceiver.layers.2.to_q.weight, Size: 0.524288 M
Parameter: perceiver.layers.2.to_kv.weight, Size: 1.048576 M
Parameter: perceiver.layers.2.to_out.weight, Size: 0.524288 M
Parameter: perceiver.layers.2.feed_forward.0.weight, Size: 0.001024 M
Parameter: perceiver.layers.2.feed_forward.0.bias, Size: 0.001024 M
Parameter: perceiver.layers.2.feed_forward.1.weight, Size: 4.194304 M
Parameter: perceiver.layers.2.feed_forward.3.weight, Size: 4.194304 M
Parameter: perceiver.layers.3.norm_media.weight, Size: 0.001024 M
Parameter: perceiver.layers.3.norm_media.bias, Size: 0.001024 M
Parameter: perceiver.layers.3.norm_latents.weight, Size: 0.001024 M
Parameter: perceiver.layers.3.norm_latents.bias, Size: 0.001024 M
Parameter: perceiver.layers.3.to_q.weight, Size: 0.524288 M
Parameter: perceiver.layers.3.to_kv.weight, Size: 1.048576 M
Parameter: perceiver.layers.3.to_out.weight, Size: 0.524288 M
Parameter: perceiver.layers.3.feed_forward.0.weight, Size: 0.001024 M
Parameter: perceiver.layers.3.feed_forward.0.bias, Size: 0.001024 M
Parameter: perceiver.layers.3.feed_forward.1.weight, Size: 4.194304 M
Parameter: perceiver.layers.3.feed_forward.3.weight, Size: 4.194304 M
Parameter: perceiver.layers.4.norm_media.weight, Size: 0.001024 M
Parameter: perceiver.layers.4.norm_media.bias, Size: 0.001024 M
Parameter: perceiver.layers.4.norm_latents.weight, Size: 0.001024 M
Parameter: perceiver.layers.4.norm_latents.bias, Size: 0.001024 M
Parameter: perceiver.layers.4.to_q.weight, Size: 0.524288 M
Parameter: perceiver.layers.4.to_kv.weight, Size: 1.048576 M
Parameter: perceiver.layers.4.to_out.weight, Size: 0.524288 M
Parameter: perceiver.layers.4.feed_forward.0.weight, Size: 0.001024 M
Parameter: perceiver.layers.4.feed_forward.0.bias, Size: 0.001024 M
Parameter: perceiver.layers.4.feed_forward.1.weight, Size: 4.194304 M
Parameter: perceiver.layers.4.feed_forward.3.weight, Size: 4.194304 M
Parameter: perceiver.layers.5.norm_media.weight, Size: 0.001024 M
Parameter: perceiver.layers.5.norm_media.bias, Size: 0.001024 M
Parameter: perceiver.layers.5.norm_latents.weight, Size: 0.001024 M
Parameter: perceiver.layers.5.norm_latents.bias, Size: 0.001024 M
Parameter: perceiver.layers.5.to_q.weight, Size: 0.524288 M
Parameter: perceiver.layers.5.to_kv.weight, Size: 1.048576 M
Parameter: perceiver.layers.5.to_out.weight, Size: 0.524288 M
Parameter: perceiver.layers.5.feed_forward.0.weight, Size: 0.001024 M
Parameter: perceiver.layers.5.feed_forward.0.bias, Size: 0.001024 M
Parameter: perceiver.layers.5.feed_forward.1.weight, Size: 4.194304 M
Parameter: perceiver.layers.5.feed_forward.3.weight, Size: 4.194304 M
Parameter: perceiver.norm.weight, Size: 0.001024 M
Parameter: perceiver.norm.bias, Size: 0.001024 M
Total Trainable param: 1.441004 B
Imported class: <class 'pipeline.benchmarks.datasets.mme.MMEDataset'>

DATASET: MMEDataset
--------------------
=========== Cognition ===========
2 changes: 1 addition & 1 deletion pipeline/accelerate_configs/accelerate_config_zero2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ main_process_port: 29501
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 1
num_processes: 8
use_cpu: false
1 change: 1 addition & 0 deletions pipeline/benchmarks/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def flush(self):
model_infos = [{"name": name} for name in model_names]
dataset_infos = [{"name": dataset_name, "cache_dir": phrased_args.cache_dir} for dataset_name in phrased_args.datasets.split(",")]

# import pdb;pdb.set_trace()
if not os.path.exists(os.path.dirname(phrased_args.output)):
os.makedirs(os.path.dirname(phrased_args.output))

Expand Down
7 changes: 5 additions & 2 deletions pipeline/mimicit_utils/mimicit_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,8 @@ def merge(key, pad_idx, pading_size=None):
try:
if samples[0].get("patch_images", None) is not None:
batch["net_input"]["patch_images"] = torch.stack([sample["patch_images"] for sample in samples], dim=0)
else:
import pdb;pdb.set_trace()
except Exception as e:
print(f"Error: {e}")
print(batch["id"])
Expand Down Expand Up @@ -709,7 +711,7 @@ def preload_dataset(path):
args.tokenizer = text_tokenizer

dataset_info = preload_dataset("/mnt/petrelfs/zhangyuanhan/Otter/shared_scripts/llava_sft_noconv_nogrounp.yaml")
dataset = MimicitDataset(args, dataset_info["TEXT_ONLY"], "TEXT_ONLY")
dataset = MimicitDataset(args, dataset_info["IMAGE_TEXT"], "IMAGE_TEXT")
sampler = RandomSampler(dataset, replacement=True, num_samples=len(dataset))
# sampler = DistributedProxySampler(sampler, num_replicas=8, rank=7)
# import pdb;pdb.set_trace()
Expand All @@ -728,7 +730,8 @@ def preload_dataset(path):
cycle_data = cycle(dataloader)
while True:
_ = next(cycle_data)
net_input = _.pop("net_input")
print(len(_["net_input"]["patch_images"]))
# net_input = _.pop("net_input")
# for _ in cycle(dataloader):
# pass
# print(_["net_input"])
Expand Down
46 changes: 23 additions & 23 deletions pipeline/train/instruction_following.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def get_dataloader_from_sequence(sequence, current_step):
global_step = num_steps + epoch * num_batches_per_epoch
# dataloader_iterator = get_next_dataloader(dataloader_iterators, weights)
dataloader_iterator = get_dataloader_from_sequence(dataloader_sequence, num_steps)
import pdb;pdb.set_trace()
# import pdb;pdb.set_trace()
batch_mimicit = next(dataloader_iterator) # Fetch a batch from the chosen dataloader

if args.model_name != "fuyu": # design fuyu's process into it's processor, a way better design than following code.
Expand All @@ -174,7 +174,7 @@ def get_dataloader_from_sequence(sequence, current_step):
master_print(e)
# print("batch_mimicit",batch_mimicit)
# print("dataloader_iterator":dataloader_iterator)
import pdb;pdb.set_trace()
# import pdb;pdb.set_trace()
continue
# pass
# import pdb;pdb.set_trace()
Expand Down Expand Up @@ -210,11 +210,11 @@ def masking(masking_number: int = -100):
return labels
labels = masking()

if args.remove_answer_token:
input_ids, labels, attention_mask = find_and_remove_tokens(input_ids, labels, attention_mask, answer_token_id, tokenizer) # find and remove certain tokens from input_ids, labels, and attention_mask
# if args.remove_answer_token:
# input_ids, labels, attention_mask = find_and_remove_tokens(input_ids, labels, attention_mask, answer_token_id, tokenizer) # find and remove certain tokens from input_ids, labels, and attention_mask

if args.remove_eos_token:
input_ids, labels, attention_mask = find_and_remove_tokens(input_ids, labels, attention_mask, endofchunk_token_id, tokenizer)
# if args.remove_eos_token:
# input_ids, labels, attention_mask = find_and_remove_tokens(input_ids, labels, attention_mask, endofchunk_token_id, tokenizer)

# put the processed content back into batch_mimicit
batch_mimicit["input_ids"] = input_ids
Expand Down Expand Up @@ -501,14 +501,14 @@ def main():
model.train()
# Main Training Loop
for epoch in range(resume_from_epoch, args.num_epochs):
save_hf_weights(
model,
args,
accelerator,
processor=processor if "idefics" in args.model_name.lower() or "fuyu" in args.model_name.lower() else None,
tokenizer=tokenizer if "llama2" in args.model_name.lower() else None,
epoch=epoch + 1,
)
# save_hf_weights(
# model,
# args,
# accelerator,
# processor=processor if "idefics" in args.model_name.lower() or "fuyu" in args.model_name.lower() else None,
# tokenizer=tokenizer if "llama2" in args.model_name.lower() else None,
# epoch=epoch + 1,
# )
train_one_epoch(
args=args,
model=model,
Expand All @@ -535,15 +535,15 @@ def main():
master_print(f"Saved checkpoint at epoch {epoch+1}.")
accelerator.wait_for_everyone()

# Save the final weights
save_hf_weights(
model,
args,
accelerator,
processor=processor if "idefics" in args.model_name.lower() or "fuyu" in args.model_name.lower() else None,
tokenizer=tokenizer if "llama2" in args.model_name.lower() else None,
)
accelerator.wait_for_everyone()
# # Save the final weights
# save_hf_weights(
# model,
# args,
# accelerator,
# processor=processor if "idefics" in args.model_name.lower() or "fuyu" in args.model_name.lower() else None,
# tokenizer=tokenizer if "llama2" in args.model_name.lower() else None,
# )
# accelerator.wait_for_everyone()


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 3146598

Please sign in to comment.