diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
index 53c1ac5a1..4749750e4 100644
--- a/.github/workflows/workflow_finetune.yml
+++ b/.github/workflows/workflow_finetune.yml
@@ -19,7 +19,7 @@ jobs:
         model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b ]
         isPR:
           - ${{inputs.ci_type == 'pr'}}
-        
+
         exclude:
           - { isPR: true }
         include:
@@ -27,22 +27,36 @@ jobs:
           - { model: "meta-llama/Llama-2-7b-chat-hf"}
 
     runs-on: self-hosted
+
+    defaults:
+      run:
+        shell: bash
+    container:
+      image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
+      env:
+        http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
+        https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
+      volumes:
+        - /var/run/docker.sock:/var/run/docker.sock
+        - ${{ vars.ACTIONS_RUNNER_CONFIG_PATH }}:/root/actions-runner-config
+
     steps:
       - name: Checkout
         uses: actions/checkout@v2
 
       - name: Load environment variables
-        run: cat ~/llm-ray-actions-runner/.env >> $GITHUB_ENV
+        run: cat /root/actions-runner-config/.env >> $GITHUB_ENV
 
       - name: Build Docker Image
-        run: docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_IMAGE_BUILD }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_IMAGE_BUILD }} -f dev/docker/Dockerfile.cpu_and_deepspeed -t finetune:latest && yes | docker container prune && yes | docker image prune
+        run: |
+          docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile.cpu_and_deepspeed -t finetune:latest && yes | docker container prune && yes
+          docker image prune -f
 
       - name: Start Docker Container
         run: |
           cid=$(docker ps -q --filter "name=finetune")
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
-          docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/llm-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER_RUN }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER_RUN }} --name="finetune" --hostname="finetune-container" finetune:latest
-
+          docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="finetune" --hostname="finetune-container" finetune:latest
       - name: Run Finetune Test
         run: |
           docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1  ray start --address='127.0.0.1:6379' --ray-debugger-external"
@@ -129,7 +143,6 @@ jobs:
             docker exec "finetune" python -c "$CMD"
             docker exec "finetune" bash -c "python finetune/finetune.py --config_file finetune/finetune.yaml"
           fi
-
       - name: Stop Ray
         run: |
           cid=$(docker ps -q --filter "name=finetune")
@@ -142,6 +155,5 @@ jobs:
         run: |
           cid=$(docker ps -q --filter "name=finetune")
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
-
       - name: Test Summary
         run: echo "to be continued"
diff --git a/.github/workflows/workflow_finetune_gpu.yml b/.github/workflows/workflow_finetune_gpu.yml
index e3adb7923..f18e4eaf5 100644
--- a/.github/workflows/workflow_finetune_gpu.yml
+++ b/.github/workflows/workflow_finetune_gpu.yml
@@ -10,16 +10,27 @@ jobs:
       matrix:
         model: [ pythia-6.9b, gpt-j-6b ]
     runs-on: self-hosted
+
+    defaults:
+      run:
+        shell: bash
+    container:
+      image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
+      env:
+        http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
+        https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
+      volumes:
+        - /var/run/docker.sock:/var/run/docker.sock
+
     steps:
       - name: Checkout
         uses: actions/checkout@v2
 
       - name: Running task on Intel GPU
         run: |
-          rm ~/borealis-runner/llm-ray.tar.gz -f
-          tar zcf ~/borealis-runner/llm-ray.tar.gz -C ~/actions-runner/_work/llm-ray .
+          rm ~/borealis-runner/llm-on-ray.tar.gz -f
+          tar zcf ~/borealis-runner/llm-on-ray.tar.gz -C ~/actions-runner/_work/llm-on-ray .
           cd ~/borealis-runner/
           python3 finetune_on_pvc.py --base_model "${{ matrix.model }}"
-
       - name: Test Summary
-        run: echo "to be continued"
+        run: echo "to be continued"
\ No newline at end of file
diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
index 154ad3158..555b650ca 100644
--- a/.github/workflows/workflow_inference.yml
+++ b/.github/workflows/workflow_inference.yml
@@ -16,7 +16,6 @@ jobs:
     name: inference test
     strategy:
       matrix:
-        # for mistral-7b-v0.1, we use bigdl-cpu to verify
         model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1 ]
         isPR:
           - ${{inputs.ci_type == 'pr'}}
@@ -32,10 +31,22 @@ jobs:
             model: mpt-7b
 
     runs-on: self-hosted
+
+    defaults:
+      run:
+        shell: bash
+    container:
+      image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }}
+      env:
+        http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }}
+        https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }}
+      volumes:
+        - /var/run/docker.sock:/var/run/docker.sock
+
     steps:
       - name: Checkout
         uses: actions/checkout@v2
-
+      
       - name: Set Name Prefix
         id: "prefix"
         run: |
@@ -54,14 +65,15 @@ jobs:
             DF_SUFFIX=".cpu_and_deepspeed"
           fi
           PREFIX=${{steps.prefix.outputs.prefix}}
-          docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_IMAGE_BUILD }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_IMAGE_BUILD }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes | docker image prune
+          docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes
+          docker image prune -f
 
       - name: Start Docker Container
         run: |
           PREFIX=${{steps.prefix.outputs.prefix}}
           cid=$(docker ps -q --filter "name=${PREFIX}")
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
-          docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/llm-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER_RUN }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER_RUN }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
+          docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest
 
       - name: Start Ray Cluster
         run: |
@@ -94,9 +106,15 @@ jobs:
             echo ${{ matrix.model }} is not supported!
           else
             docker exec "${PREFIX}" bash -c "python .github/workflows/config/update_inference_config.py --config_file inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed"
+<<<<<<< HEAD
             docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed --serve_simple"
             docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
             docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
+=======
+            docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed"
+            docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
+            docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
+>>>>>>> opensource/main
           fi
 
       - name: Run Inference Test with DeepSpeed and Deltatuner
@@ -137,4 +155,7 @@ jobs:
           if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
 
       - name: Test Summary
-        run: echo "to be continued"
\ No newline at end of file
+        run: echo "to be continued"
+
+
+      
diff --git a/.github/workflows/workflow_orders_nightly.yml b/.github/workflows/workflow_orders_nightly.yml
index ae7cf89f5..f80e6f825 100644
--- a/.github/workflows/workflow_orders_nightly.yml
+++ b/.github/workflows/workflow_orders_nightly.yml
@@ -1,8 +1,8 @@
-name: llm-ray inference & finetune
+name: llm-ray inference & finetune nightly
 
 on:
   schedule:
-    - cron: "0 19 * * *"
+    - cron: "0 16 * * *"
 
 jobs:
 
diff --git a/.github/workflows/workflow_orders_on_merge.yml b/.github/workflows/workflow_orders_on_merge.yml
new file mode 100644
index 000000000..56bda5006
--- /dev/null
+++ b/.github/workflows/workflow_orders_on_merge.yml
@@ -0,0 +1,24 @@
+name: llm-ray inference & finetune
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/**'
+      - 'docker/**'
+      - 'common/**'
+      - 'dev/docker/**'
+      - 'finetune/**'
+      - 'inference/**'
+      - 'rlhf/**'
+      - 'tools/**'
+      - 'pyproject.toml'
+
+jobs:
+
+  call-inference:
+    uses: ./.github/workflows/workflow_inference.yml
+
+  call-finetune:
+    uses: ./.github/workflows/workflow_finetune.yml
diff --git a/.github/workflows/workflow_orders.yml b/.github/workflows/workflow_orders_on_pr.yml
similarity index 94%
rename from .github/workflows/workflow_orders.yml
rename to .github/workflows/workflow_orders_on_pr.yml
index e13bccecf..2c8f93f3d 100644
--- a/.github/workflows/workflow_orders.yml
+++ b/.github/workflows/workflow_orders_on_pr.yml
@@ -13,6 +13,7 @@ on:
       - 'inference/**'
       - 'rlhf/**'
       - 'tools/**'
+      - 'pyproject.toml'
 
 jobs:
 
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 000000000..58dba18db
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,131 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall
+  community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or advances of
+  any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+CommunityCodeOfConduct AT intel DOT com.
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+[Mozilla CoC]: https://github.com/mozilla/diversity
+[FAQ]: https://www.contributor-covenant.org/faq
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 000000000..f682f4e4c
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,57 @@
+# Contributing
+
+### License
+
+<PROJECT NAME> is licensed under the terms in [LICENSE]<link to license file in repo>. By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms.
+
+### Sign your work
+
+Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify
+the below (from [developercertificate.org](http://developercertificate.org/)):
+
+```
+Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
+660 York Street, Suite 102,
+San Francisco, CA 94110 USA
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.
+```
+
+Then you just add a line to every git commit message:
+
+    Signed-off-by: Joe Smith <joe.smith@email.com>
+
+Use your real name (sorry, no pseudonyms or anonymous contributions.)
+
+If you set your `user.name` and `user.email` git configs, you can sign your
+commit automatically with `git commit -s`.
diff --git a/README.md b/README.md
index 7276aef1d..965dc56be 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,7 @@ LLM-on-Ray's modular workflow structure is designed to comprehensively cater to
 * **Interactive Web UI for Enhanced Usability**: Except for command line, LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLMs through a user-friendly interface. Additionally, the UI includes a chatbot application, enabling users to immediately test and refine the models.
 
 
-![image](https://github.com/intel-sandbox/llm-ray/assets/9278199/addd7a7f-83ef-43ae-b3ac-dd81cc2570e4)
+![llm-on-ray](https://github.com/intel/llm-on-ray/assets/9278199/68017c14-c0be-4b91-8d71-4b74ab89bd81)
 
 
 ## Getting Started
diff --git a/common/trainer/default_trainer.py b/common/trainer/default_trainer.py
index d013f28e2..f3aa965b9 100644
--- a/common/trainer/default_trainer.py
+++ b/common/trainer/default_trainer.py
@@ -57,8 +57,10 @@ def recovery(self, config):
                 self.starting_epoch = checkpoint_epoch["epoch"] + 1
 
             logger.info(f"recovery to epoch {self.starting_epoch}")
+        except FileNotFoundError as e:
+            logger.info(e)
         except Exception as e:
-            logger.warning(f"recovery error", exc_info=True)
+            logger.warning("recovery error", exc_info=True)
 
     def _coordinate(self, accelerator):
         self.accelerator = accelerator
@@ -174,7 +176,7 @@ def train(self):
                 except OverflowError:
                     eval_loss = float("inf")
                     perplexity = float("inf")
-                logger.info(f"eval epoch:[{idx}/{num_train_epochs}]\tloss:[{eval_loss}]\tppl:[{perplexity}]\ttime:[{time.time()-start}]")
+                logger.info(f"eval epoch:[{idx}/{num_train_epochs}]\tloss:[{eval_loss:.6f}]\tppl:[{perplexity:.6f}]\ttime:[{time.time()-start:.6f}]")
 
             if checkpoint is not None:
                 self.save(checkpoint, idx)
diff --git a/dev/docker/Dockerfile.bigdl-cpu b/dev/docker/Dockerfile.bigdl-cpu
index 449a456b4..403848876 100644
--- a/dev/docker/Dockerfile.bigdl-cpu
+++ b/dev/docker/Dockerfile.bigdl-cpu
@@ -1,10 +1,11 @@
+# syntax=docker/dockerfile:1
 FROM ubuntu:22.04
 
 ENV LANG C.UTF-8
 
-WORKDIR /root/llm-ray
+WORKDIR /root/llm-on-ray
 
-RUN apt-get update -y \
+RUN --mount=type=cache,target=/var/cache/apt apt-get update -y \
     && apt-get install -y build-essential cmake wget curl git vim htop ssh net-tools \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
@@ -17,7 +18,7 @@ ENV PATH $CONDA_DIR/bin:$PATH
 # setup env
 SHELL ["/bin/bash", "--login", "-c"]
 
-RUN conda init bash && \
+RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
     unset -f conda && \
     export PATH=$CONDA_DIR/bin/:${PATH} && \
     conda config --add channels intel && \
@@ -27,7 +28,7 @@ COPY ./pyproject.toml .
 
 RUN mkdir ./finetune && mkdir ./inference
 
-RUN pip install -e .[bigdl-cpu] -f https://developer.intel.com/ipex-whl-stable-cpu \
+RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[bigdl-cpu] -f https://developer.intel.com/ipex-whl-stable-cpu \
     -f https://download.pytorch.org/whl/torch_stable.html
 
 # Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s)
diff --git a/dev/docker/Dockerfile.cpu_and_deepspeed b/dev/docker/Dockerfile.cpu_and_deepspeed
index 9fb0a581a..c907d775f 100644
--- a/dev/docker/Dockerfile.cpu_and_deepspeed
+++ b/dev/docker/Dockerfile.cpu_and_deepspeed
@@ -1,10 +1,11 @@
+# syntax=docker/dockerfile:1
 FROM ubuntu:22.04
 
 ENV LANG C.UTF-8
 
 WORKDIR /root/llm-on-ray
 
-RUN apt-get update -y \
+RUN --mount=type=cache,target=/var/cache/apt apt-get update -y \
     && apt-get install -y build-essential cmake wget curl git vim htop ssh net-tools \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
@@ -17,7 +18,7 @@ ENV PATH $CONDA_DIR/bin:$PATH
 # setup env
 SHELL ["/bin/bash", "--login", "-c"]
 
-RUN conda init bash && \
+RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \
     unset -f conda && \
     export PATH=$CONDA_DIR/bin/:${PATH} && \
     conda config --add channels intel && \
@@ -27,7 +28,7 @@ COPY ./pyproject.toml .
 
 RUN mkdir ./finetune && mkdir ./inference
 
-RUN pip install -e .[cpu,deepspeed] -f https://developer.intel.com/ipex-whl-stable-cpu \
+RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu,deepspeed] -f https://developer.intel.com/ipex-whl-stable-cpu \
     -f https://download.pytorch.org/whl/torch_stable.html
 
 RUN ds_report
diff --git a/docs/web_ui.md b/docs/web_ui.md
index 3b0578643..da92b7f66 100644
--- a/docs/web_ui.md
+++ b/docs/web_ui.md
@@ -3,7 +3,7 @@
 LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLMs through a user-friendly interface. Additionally, the UI includes a chatbot application, enabling users to immediately test and refine the models.
 
 ## Setup
-Please follow [docs/setup.md](docs/setup.md) to setup the environment first.
+Please follow [setup.md](setup.md) to setup the environment first.
 
 ## Start Web UI
 
@@ -17,17 +17,19 @@ python -u inference/start_ui.py --node_user_name $user --conda_env_name $conda_e
 ## Finetune LLMs
 On the `Finetune` tab, you can configure the base model, finetuning parameters, the dataset path and the new model name. Click `Start To Finetune` to start finetuning.
 
-![image](https://github.com/carsonwang/llm-ray/assets/9278199/38cb6f1f-b5de-495e-a4db-741eb1e15980)
+![webui1](https://github.com/intel/llm-on-ray/assets/9278199/895be765-13d3-455e-a00d-c9ba67ac6781)
+
 
 
 ## Deploy and Serve LLM
 On the `Deployment` tab, you can choose a model to deploy, configure parameter `Model Replica Number`, `Cpus per Worker` and `Gpus per Worker`. Click `Deploy` and you will get a model endpoint.
 
-![image](https://github.com/carsonwang/llm-ray/assets/9278199/937613ad-951c-4543-9e2d-e5b8e7f38d1b)
+![webui2](https://github.com/intel/llm-on-ray/assets/9278199/2a1fb8f2-a2a8-4868-9d1c-418c5c2a6180)
+
 
 ## Chatbot
 On the `Inferenc` tab, you can now test the model by asking questions.
 
-![image](https://github.com/carsonwang/llm-ray/assets/9278199/5aa3dace-238a-4b34-9ce2-b3abbd6de2ba)
+![webui3](https://github.com/intel/llm-on-ray/assets/9278199/f7b9dc79-92fe-4e75-85fa-2cf7f36bb58d)
 
 
diff --git a/finetune/finetune.py b/finetune/finetune.py
index 088ef89f1..430c452de 100644
--- a/finetune/finetune.py
+++ b/finetune/finetune.py
@@ -108,6 +108,7 @@ def train_func(config: Dict[str, Any]):
     trainer = common.trainer.Trainer.registory.get("DefaultTrainer")(config = {
         "num_train_epochs": config["Training"]["epochs"],
         "max_train_step": config["Training"].get("max_train_steps", None),
+        "log_step": 1,
         "output": config["General"]["output_dir"],
         "dataprocesser": {
             "type": "GeneralProcesser",
@@ -200,6 +201,8 @@ def main(external_config = None):
 
         ray.init(runtime_env = runtime_env)
 
+    common.logger.info(f"ray available resources = {ray.available_resources()}")
+
     scaling_config = ScalingConfig(
         num_workers = num_training_workers,
         use_gpu = use_gpu,
diff --git a/inference/models/llama-2-7b-chat-hf.yaml b/inference/models/llama-2-7b-chat-hf.yaml
index 168981aaa..b0dc029da 100644
--- a/inference/models/llama-2-7b-chat-hf.yaml
+++ b/inference/models/llama-2-7b-chat-hf.yaml
@@ -7,7 +7,7 @@ deepspeed: false
 workers_per_group: 2
 device: "cpu"
 ipex:
-  enabled: false
+  enabled: true
   precision: bf16
 model_description:
   model_id_or_path: meta-llama/Llama-2-7b-chat-hf
diff --git a/pretrain/requirements.txt b/pretrain/requirements.txt
index fa0d041cd..e25a3e55b 100644
--- a/pretrain/requirements.txt
+++ b/pretrain/requirements.txt
@@ -4,7 +4,7 @@ numpy==1.24.4
 https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl
 torchvision==0.14.1
 torch==1.13.1
-transformers==4.31.0
+transformers==4.36.0
 typing==3.7.4.3
 tabulate
 ray[tune]
diff --git a/pyproject.toml b/pyproject.toml
index 01d5160cc..d5c4396c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
     "accelerate",
     "datasets>=2.14.6",
     "numpy",
-    "ray @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl",
+    "ray>=2.9",
     "typing>=3.7.4.3",
     "tabulate",
     "ray[tune]",
@@ -36,7 +36,7 @@ dependencies = [
     "deltatuner==1.1.9",
     "py-cpuinfo",
     "pydantic-yaml",
-    "paramiko==3.2.0",
+    "paramiko==3.4.0",
 ]
 
 [project.optional-dependencies]
@@ -52,8 +52,8 @@ gpu = [
     "torch==2.0.1a0",
     "torchvision==0.15.2a0",
     "intel-extension-for-pytorch==2.0.110+xpu",
-    "oneccl_bind_pt",
-    "dpctl"
+    "oneccl_bind_pt==2.0.100+gpu",
+    "dpctl==0.14.5"
 ]
 
 deepspeed = [
diff --git a/Security.md b/security.md
similarity index 68%
rename from Security.md
rename to security.md
index d85d4358b..cb59eb893 100644
--- a/Security.md
+++ b/security.md
@@ -2,4 +2,4 @@
 Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 
 
 ## Reporting a Vulnerability
-Please report any security vulnerabilities in this project [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).
+Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).