Azure · yeshsurya · Jan 13, 2025 · Jan 21, 2025 · Jan 23, 2025 · Jan 28, 2025
@@ -0,0 +1,3 @@
+type: component
+spec: spec.yaml
+categories: ["Foundational Models Finetune"]
@@ -0,0 +1,114 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+name: medimgage_adapter_finetune
+version: 0.0.3
+type: command
+
+is_deterministic: True
+
+display_name: Medical Image Adapter Finetune
+description: Component to finetune the model using the medical image data
+
+environment: azureml:/subscriptions/dbd697c3-ef40-488f-83e6-5ad4dfb78f9b/resourceGroups/rdondera/providers/Microsoft.MachineLearningServices/workspaces/validatr/environments/medimage-adapter-pt-ft/versions/13
+
+code: ../../../src/finetune
+
+distribution:
+  type: pytorch
+
+inputs:
+  train_data_path:
+    type: uri_file
+    optional: false
+    description: Path to the training data file.
+    mode: ro_mount
+
+  validation_data_path:
+    type: uri_file
+    optional: false
+    description: Path to the validation data file.
+    mode: ro_mount
+
+  train_dataloader_batch_size:
+    type: integer
+    min: 1
+    default: 8
+    optional: true
+    description: Batch size for the training dataloader.
+
+  validation_dataloader_batch_size:
+    type: integer
+    min: 1
+    default: 1
+    optional: true
+    description: Batch size for the validation dataloader.
+
+  train_dataloader_workers:
+    type: integer
+    min: 0
+    default: 2
+    optional: true
+    description: Number of workers for the training dataloader.
+
+  validation_dataloader_workers:
+    type: integer
+    min: 0
+    default: 2
+    optional: true
+    description: Number of workers for the validation dataloader.
+
+  output_classes:
+    type: integer
+    min: 1
+    default: 5
+    optional: true
+    description: Number of output classes.
+
+  hidden_dimensions:
+    type: integer
+    min: 1
+    default: 512
+    optional: true
+    description: Number of hidden dimensions.
+
+  input_channels:
+    type: integer
+    min: 1
+    default: 1024
+    optional: true
+    description: Number of input channels.
+
+  learning_rate:
+    type: number
+    default: 0.0003
+    optional: true
+    description: Learning rate for training.
+
+  max_epochs:
+    type: integer
+    min: 1
+    default: 10
+    optional: true
+    description: Maximum number of epochs for training.
+
+outputs:
+  output_model_path:
+    type: uri_folder
+    description: Path to save the output model.
+    mode: rw_mount
+
+command: >-
+  python medimage_train.py 
+  --task_name "AdapterTrain" 
+  --train_data_path "${{inputs.train_data_path}}" 
+  --validation_data_path "${{inputs.validation_data_path}}" 
+  $[[--train_dataloader_batch_size "${{inputs.train_dataloader_batch_size}}"]]
+  $[[--validation_dataloader_batch_size "${{inputs.validation_dataloader_batch_size}}"]]
+  $[[--train_dataloader_workers "${{inputs.train_dataloader_workers}}"]]
+  $[[--validation_dataloader_workers "${{inputs.validation_dataloader_workers}}"]]
+  $[[--output_classes "${{inputs.output_classes}}"]]
+  $[[--hidden_dimensions "${{inputs.hidden_dimensions}}"]]
+  $[[--input_channels "${{inputs.input_channels}}"]]
+  $[[--learning_rate "${{inputs.learning_rate}}"]]
+  $[[--max_epochs "${{inputs.max_epochs}}"]]
+  --output_model_path "${{outputs.output_model_path}}" 
+
@@ -0,0 +1,3 @@
+type: component
+spec: spec.yaml
+categories: ["Foundational Models", "Finetune"]
@@ -0,0 +1,150 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
+name: medimage_insight_ft_pipeline
+version: 0.0.1.yesh5
+type: pipeline
+display_name: Medical Image Insight Classification Adapter Pipeline
+description: Pipeline Component to finetune Hugging Face pretrained models for chat completion task. The component supports optimizations such as LoRA, Deepspeed and ONNXRuntime for performance enhancement. See [docs](https://aka.ms/azureml/components/chat_completion_pipeline) to learn more.
+
+inputs:
+  instance_type_preprocess:
+      type: string
+      optional: true
+      default: Standard_d12_v2
+      description: Instance type to be used for preprocess component in case of serverless compute, eg. standard_d12_v2. 
+        The parameter compute_preprocess must be set to 'serverless' for instance_type to be used
+  instance_type_finetune:
+    type: string
+    optional: true
+    default: Standard_nc24rs_v3
+    description: Instance type to be used for finetune component in case of serverless compute, eg. standard_nc24rs_v3. 
+      The parameter compute_finetune must be set to 'serverless' for instance_type to be used
+  compute_preprocess:
+    type: string
+    optional: true
+    default: serverless
+    description: compute to be used for preprocess eg. provide 'FT-Cluster' if your
+      compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value.
+      If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used
+  compute_finetune:
+    type: string
+    optional: true
+    default: serverless
+    description: compute to be used for finetune eg. provide 'FT-Cluster' if your
+      compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value.
+      If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used
+  mlflow_model_path:
+    type: uri_folder
+    optional: false
+    description: Path to the MLflow model to be used.
+
+  zeroshot_path:
+    type: uri_file
+    optional: false
+    description: Path to the zeroshot data file.
+    mode: rw_mount
+
+  test_train_split_csv_path:
+    type: uri_file
+    optional: false
+    description: Path to the CSV file containing test-train split information.
+    mode: rw_mount
+
+  train_dataloader_batch_size:
+    type: integer
+    min: 1
+    default: 8
+    optional: true
+    description: Batch size for the training dataloader.
+
+  validation_dataloader_batch_size:
+    type: integer
+    min: 1
+    default: 1
+    optional: true
+    description: Batch size for the validation dataloader.
+
+  train_dataloader_workers:
+    type: integer
+    min: 0
+    default: 2
+    optional: true
+    description: Number of workers for the training dataloader.
+
+  validation_dataloader_workers:
+    type: integer
+    min: 0
+    default: 2
+    optional: true
+    description: Number of workers for the validation dataloader.
+
+  output_classes:
+    type: integer
+    min: 1
+    default: 5
+    optional: true
+    description: Number of output classes.
+
+  hidden_dimensions:
+    type: integer
+    min: 1
+    default: 512
+    optional: true
+    description: Number of hidden dimensions.
+
+  input_channels:
+    type: integer
+    min: 1
+    default: 1024
+    optional: true
+    description: Number of input channels.
+
+  learning_rate:
+    type: number
+    default: 0.0003
+    optional: true
+    description: Learning rate for training.
+
+  max_epochs:
+    type: integer
+    min: 1
+    default: 10
+    optional: true
+    description: Maximum number of epochs for training.
+
+outputs:
+  output_model_path:
+    type: uri_folder
+    description: Path to save the output model.
+    mode: rw_mount
+
+jobs:
+  medical_image_embedding_datapreprocessing:
+    type: command
+    component: azureml:medical_image_embedding_datapreprocessing:0.0.1.yesh5
+    compute: '${{parent.inputs.compute_finetune}}'
+    resources:
+      instance_type: '${{parent.inputs.instance_type_preprocess}}'
+    inputs:
+      mlflow_model_path: '${{parent.inputs.mlflow_model_path}}'
+      zeroshot_path: '${{parent.inputs.zeroshot_path}}'
+      test_train_split_csv_path: '${{parent.inputs.test_train_split_csv_path}}'
+  medimgage_adapter_finetune:
+    type: command
+    component: azureml:medimgage_adapter_finetune:0.0.1.yesh1
+    compute: '${{parent.inputs.compute_finetune}}'
+    resources:
+      instance_type: '${{parent.inputs.instance_type_finetune}}'
+    inputs:
+      train_data_path: '${{parent.jobs.medical_image_embedding_datapreprocessing.outputs.output_train_pkl}}'
+      validation_data_path: '${{parent.jobs.medical_image_embedding_datapreprocessing.outputs.output_validation_pkl}}'
+      train_dataloader_batch_size: '${{parent.inputs.train_dataloader_batch_size}}'
+      validation_dataloader_batch_size: '${{parent.inputs.validation_dataloader_batch_size}}'
+      train_dataloader_workers: '${{parent.inputs.train_dataloader_workers}}'
+      validation_dataloader_workers: '${{parent.inputs.validation_dataloader_workers}}'
+      output_classes: '${{parent.inputs.output_classes}}'
+      hidden_dimensions: '${{parent.inputs.hidden_dimensions}}'
+      input_channels: '${{parent.inputs.input_channels}}'
+      learning_rate: '${{parent.inputs.learning_rate}}'
+      max_epochs: '${{parent.inputs.max_epochs}}'
+    outputs:
+      output_model_path: '${{parent.outputs.output_model_path}}'
@@ -0,0 +1,3 @@
+type: component
+spec: spec.yaml
+categories: ["Foundational Models", "Embedding"]
@@ -0,0 +1,46 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
+name: medical_image_embedding_datapreprocessing
+version: 0.0.1.yesh5
+type: command
+
+is_deterministic: True
+
+display_name: Embedding Generation for Medical Images
+description: To genrate embeddings for medical images. See [docs](https://aka.ms/azureml/components/medical_image_embedding_datapreprocessing) to learn more.
+
+environment: azureml:/subscriptions/dbd697c3-ef40-488f-83e6-5ad4dfb78f9b/resourceGroups/rdondera/providers/Microsoft.MachineLearningServices/workspaces/validatr/environments/medimage-embedding-generation/versions/5
+
+code: ../../../src/medimage_insight_adapter_preprocess
+
+inputs:
+  zeroshot_path:
+    type: uri_file
+    optional: false
+    description: Path to the zeroshot data file.
+    mode: rw_mount
+
+  test_train_split_csv_path:
+    type: uri_file
+    optional: false
+    description: Path to the CSV file containing test-train split information.
+    mode: rw_mount
+
+  mlflow_model_path:
+    type: uri_folder
+    optional: false
+    description: Path to the MLflow model to be imported.
+    mode: ro_mount
+
+outputs:
+  output_train_pkl:
+    type: uri_folder
+    description: Path to the output training PKL file.
+    mode: rw_mount
+
+  output_validation_pkl:
+    type: uri_folder
+    description: Path to the output validation PKL file.
+    mode: rw_mount
+
+command: >-
+  python medimage_datapreprocess.py --task_name "MedEmbedding" --zeroshot_path "${{inputs.zeroshot_path}}" --test_train_split_csv_path "${{inputs.test_train_split_csv_path}}" --output_train_pkl "${{outputs.output_train_pkl}}" --output_validation_pkl "${{outputs.output_validation_pkl}}" --mlflow_model_path "${{inputs.mlflow_model_path}}"
@@ -0,0 +1,6 @@
+name: acft-medimageinsight-embedding
+version: auto
+type: environment
+spec: spec.yaml
+extra_config: environment.yaml
+categories: ["PyTorch", "Training"]
@@ -0,0 +1,12 @@
+# PTCA image
+FROM mcr.microsoft.com/aifx/acpt/stable-ubuntu2004-cu118-py310-torch222:biweekly.202409.3
+
+USER root
+RUN apt-get -y update
+
+# Install unzip
+RUN apt-get -y install unzip
+
+# Install required packages from pypi
+COPY requirements.txt .
+RUN pip install -r requirements.txt --no-cache-dir
@@ -0,0 +1,25 @@
+azureml-acft-common-components==0.0.65
+azureml-acft-contrib-hf-nlp==0.0.65
+https://automlcesdkdataresources.blob.core.windows.net/wheels/classification_demo-0.1.2-py3-none-any.whl
+mlflow==2.14.3
+cloudpickle==2.2.1
+colorama==0.4.6
+einops==0.8.0
+ftfy==6.3.1
+fvcore==0.1.5.post20221221
+jinja2==3.1.5
+mup==1.0.0
+numpy==1.23.5
+packaging==24.2
+pandas==2.2.3
+psutil==6.1.1
+pyyaml==6.0.2
+requests==2.32.3
+scikit-learn==1.5.2
+scipy==1.13.1
+sentencepiece==0.2.0
+tenacity==9.0.0
+timm==1.0.13
+tornado==6.4.2
+SimpleITK~=2.4.0
+transformers==4.16.2
@@ -0,0 +1,12 @@
+image:
+  name: azureml/curated/acft-medimageinsight-embedding
+  os: linux
+  context:
+    dir: context
+    dockerfile: Dockerfile
+    template_files:
+    - Dockerfile
+    - requirements.txt
+  publish:
+    location: mcr
+    visibility: public