diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml index 53c1ac5a1..4749750e4 100644 --- a/.github/workflows/workflow_finetune.yml +++ b/.github/workflows/workflow_finetune.yml @@ -19,7 +19,7 @@ jobs: model: [ EleutherAI/gpt-j-6b, meta-llama/Llama-2-7b-chat-hf, gpt2, bigscience/bloom-560m, facebook/opt-125m, mosaicml/mpt-7b-chat, huggyllama/llama-7b ] isPR: - ${{inputs.ci_type == 'pr'}} - + exclude: - { isPR: true } include: @@ -27,22 +27,36 @@ jobs: - { model: "meta-llama/Llama-2-7b-chat-hf"} runs-on: self-hosted + + defaults: + run: + shell: bash + container: + image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }} + env: + http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }} + https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }} + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ${{ vars.ACTIONS_RUNNER_CONFIG_PATH }}:/root/actions-runner-config + steps: - name: Checkout uses: actions/checkout@v2 - name: Load environment variables - run: cat ~/llm-ray-actions-runner/.env >> $GITHUB_ENV + run: cat /root/actions-runner-config/.env >> $GITHUB_ENV - name: Build Docker Image - run: docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_IMAGE_BUILD }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_IMAGE_BUILD }} -f dev/docker/Dockerfile.cpu_and_deepspeed -t finetune:latest && yes | docker container prune && yes | docker image prune + run: | + docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile.cpu_and_deepspeed -t finetune:latest && yes | docker container prune && yes + docker image prune -f - name: Start Docker Container run: | cid=$(docker ps -q --filter "name=finetune") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/llm-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER_RUN }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER_RUN }} --name="finetune" --hostname="finetune-container" finetune:latest - + docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="finetune" --hostname="finetune-container" finetune:latest - name: Run Finetune Test run: | docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --address='127.0.0.1:6379' --ray-debugger-external" @@ -129,7 +143,6 @@ jobs: docker exec "finetune" python -c "$CMD" docker exec "finetune" bash -c "python finetune/finetune.py --config_file finetune/finetune.yaml" fi - - name: Stop Ray run: | cid=$(docker ps -q --filter "name=finetune") @@ -142,6 +155,5 @@ jobs: run: | cid=$(docker ps -q --filter "name=finetune") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - - name: Test Summary run: echo "to be continued" diff --git a/.github/workflows/workflow_finetune_gpu.yml b/.github/workflows/workflow_finetune_gpu.yml index e3adb7923..f18e4eaf5 100644 --- a/.github/workflows/workflow_finetune_gpu.yml +++ b/.github/workflows/workflow_finetune_gpu.yml @@ -10,16 +10,27 @@ jobs: matrix: model: [ pythia-6.9b, gpt-j-6b ] runs-on: self-hosted + + defaults: + run: + shell: bash + container: + image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }} + env: + http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }} + https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }} + volumes: + - /var/run/docker.sock:/var/run/docker.sock + steps: - name: Checkout uses: actions/checkout@v2 - name: Running task on Intel GPU run: | - rm ~/borealis-runner/llm-ray.tar.gz -f - tar zcf ~/borealis-runner/llm-ray.tar.gz -C ~/actions-runner/_work/llm-ray . + rm ~/borealis-runner/llm-on-ray.tar.gz -f + tar zcf ~/borealis-runner/llm-on-ray.tar.gz -C ~/actions-runner/_work/llm-on-ray . cd ~/borealis-runner/ python3 finetune_on_pvc.py --base_model "${{ matrix.model }}" - - name: Test Summary - run: echo "to be continued" + run: echo "to be continued" \ No newline at end of file diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml index 154ad3158..555b650ca 100644 --- a/.github/workflows/workflow_inference.yml +++ b/.github/workflows/workflow_inference.yml @@ -16,7 +16,6 @@ jobs: name: inference test strategy: matrix: - # for mistral-7b-v0.1, we use bigdl-cpu to verify model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1 ] isPR: - ${{inputs.ci_type == 'pr'}} @@ -32,10 +31,22 @@ jobs: model: mpt-7b runs-on: self-hosted + + defaults: + run: + shell: bash + container: + image: ${{ vars.ACTIONS_RUNNER_CONTAINER_IMAGE }} + env: + http_proxy: ${{ vars.HTTP_PROXY_CONTAINER }} + https_proxy: ${{ vars.HTTPS_PROXY_CONTAINER }} + volumes: + - /var/run/docker.sock:/var/run/docker.sock + steps: - name: Checkout uses: actions/checkout@v2 - + - name: Set Name Prefix id: "prefix" run: | @@ -54,14 +65,15 @@ jobs: DF_SUFFIX=".cpu_and_deepspeed" fi PREFIX=${{steps.prefix.outputs.prefix}} - docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_IMAGE_BUILD }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_IMAGE_BUILD }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes | docker image prune + docker build ./ --build-arg CACHEBUST=1 --build-arg http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} --build-arg https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} -f dev/docker/Dockerfile${DF_SUFFIX} -t ${PREFIX}:latest && yes | docker container prune && yes + docker image prune -f - name: Start Docker Container run: | PREFIX=${{steps.prefix.outputs.prefix}} cid=$(docker ps -q --filter "name=${PREFIX}") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/llm-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER_RUN }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER_RUN }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest + docker run -tid -v ${{ vars.MODEL_CACHE_PATH }}:/root/.cache/huggingface/hub -v ${{ vars.CODE_CHECKOUT_PATH }}:/root/llm-on-ray -e http_proxy=${{ vars.HTTP_PROXY_CONTAINER }} -e https_proxy=${{ vars.HTTPS_PROXY_CONTAINER }} --name="${PREFIX}" --hostname="${PREFIX}-container" ${PREFIX}:latest - name: Start Ray Cluster run: | @@ -94,9 +106,15 @@ jobs: echo ${{ matrix.model }} is not supported! else docker exec "${PREFIX}" bash -c "python .github/workflows/config/update_inference_config.py --config_file inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed" +<<<<<<< HEAD docker exec "${PREFIX}" bash -c "python inference/serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed --serve_simple" docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" docker exec "${PREFIX}" bash -c "python examples/inference/api_server_simple/query_single.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" +======= + docker exec "${PREFIX}" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file \"${{ matrix.model }}\".yaml.deepspeed" + docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}" + docker exec "${PREFIX}" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response" +>>>>>>> opensource/main fi - name: Run Inference Test with DeepSpeed and Deltatuner @@ -137,4 +155,7 @@ jobs: if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - name: Test Summary - run: echo "to be continued" \ No newline at end of file + run: echo "to be continued" + + + diff --git a/.github/workflows/workflow_orders_nightly.yml b/.github/workflows/workflow_orders_nightly.yml index ae7cf89f5..f80e6f825 100644 --- a/.github/workflows/workflow_orders_nightly.yml +++ b/.github/workflows/workflow_orders_nightly.yml @@ -1,8 +1,8 @@ -name: llm-ray inference & finetune +name: llm-ray inference & finetune nightly on: schedule: - - cron: "0 19 * * *" + - cron: "0 16 * * *" jobs: diff --git a/.github/workflows/workflow_orders_on_merge.yml b/.github/workflows/workflow_orders_on_merge.yml new file mode 100644 index 000000000..56bda5006 --- /dev/null +++ b/.github/workflows/workflow_orders_on_merge.yml @@ -0,0 +1,24 @@ +name: llm-ray inference & finetune + +on: + push: + branches: + - main + paths: + - '.github/**' + - 'docker/**' + - 'common/**' + - 'dev/docker/**' + - 'finetune/**' + - 'inference/**' + - 'rlhf/**' + - 'tools/**' + - 'pyproject.toml' + +jobs: + + call-inference: + uses: ./.github/workflows/workflow_inference.yml + + call-finetune: + uses: ./.github/workflows/workflow_finetune.yml diff --git a/.github/workflows/workflow_orders.yml b/.github/workflows/workflow_orders_on_pr.yml similarity index 94% rename from .github/workflows/workflow_orders.yml rename to .github/workflows/workflow_orders_on_pr.yml index e13bccecf..2c8f93f3d 100644 --- a/.github/workflows/workflow_orders.yml +++ b/.github/workflows/workflow_orders_on_pr.yml @@ -13,6 +13,7 @@ on: - 'inference/**' - 'rlhf/**' - 'tools/**' + - 'pyproject.toml' jobs: diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..58dba18db --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,131 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +CommunityCodeOfConduct AT intel DOT com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..f682f4e4c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,57 @@ +# Contributing + +### License + + is licensed under the terms in [LICENSE]. By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. + +### Sign your work + +Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify +the below (from [developercertificate.org](http://developercertificate.org/)): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +Then you just add a line to every git commit message: + + Signed-off-by: Joe Smith + +Use your real name (sorry, no pseudonyms or anonymous contributions.) + +If you set your `user.name` and `user.email` git configs, you can sign your +commit automatically with `git commit -s`. diff --git a/README.md b/README.md index 7276aef1d..965dc56be 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ LLM-on-Ray's modular workflow structure is designed to comprehensively cater to * **Interactive Web UI for Enhanced Usability**: Except for command line, LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLMs through a user-friendly interface. Additionally, the UI includes a chatbot application, enabling users to immediately test and refine the models. -![image](https://github.com/intel-sandbox/llm-ray/assets/9278199/addd7a7f-83ef-43ae-b3ac-dd81cc2570e4) +![llm-on-ray](https://github.com/intel/llm-on-ray/assets/9278199/68017c14-c0be-4b91-8d71-4b74ab89bd81) ## Getting Started diff --git a/common/trainer/default_trainer.py b/common/trainer/default_trainer.py index d013f28e2..f3aa965b9 100644 --- a/common/trainer/default_trainer.py +++ b/common/trainer/default_trainer.py @@ -57,8 +57,10 @@ def recovery(self, config): self.starting_epoch = checkpoint_epoch["epoch"] + 1 logger.info(f"recovery to epoch {self.starting_epoch}") + except FileNotFoundError as e: + logger.info(e) except Exception as e: - logger.warning(f"recovery error", exc_info=True) + logger.warning("recovery error", exc_info=True) def _coordinate(self, accelerator): self.accelerator = accelerator @@ -174,7 +176,7 @@ def train(self): except OverflowError: eval_loss = float("inf") perplexity = float("inf") - logger.info(f"eval epoch:[{idx}/{num_train_epochs}]\tloss:[{eval_loss}]\tppl:[{perplexity}]\ttime:[{time.time()-start}]") + logger.info(f"eval epoch:[{idx}/{num_train_epochs}]\tloss:[{eval_loss:.6f}]\tppl:[{perplexity:.6f}]\ttime:[{time.time()-start:.6f}]") if checkpoint is not None: self.save(checkpoint, idx) diff --git a/dev/docker/Dockerfile.bigdl-cpu b/dev/docker/Dockerfile.bigdl-cpu index 449a456b4..403848876 100644 --- a/dev/docker/Dockerfile.bigdl-cpu +++ b/dev/docker/Dockerfile.bigdl-cpu @@ -1,10 +1,11 @@ +# syntax=docker/dockerfile:1 FROM ubuntu:22.04 ENV LANG C.UTF-8 -WORKDIR /root/llm-ray +WORKDIR /root/llm-on-ray -RUN apt-get update -y \ +RUN --mount=type=cache,target=/var/cache/apt apt-get update -y \ && apt-get install -y build-essential cmake wget curl git vim htop ssh net-tools \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -17,7 +18,7 @@ ENV PATH $CONDA_DIR/bin:$PATH # setup env SHELL ["/bin/bash", "--login", "-c"] -RUN conda init bash && \ +RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \ unset -f conda && \ export PATH=$CONDA_DIR/bin/:${PATH} && \ conda config --add channels intel && \ @@ -27,7 +28,7 @@ COPY ./pyproject.toml . RUN mkdir ./finetune && mkdir ./inference -RUN pip install -e .[bigdl-cpu] -f https://developer.intel.com/ipex-whl-stable-cpu \ +RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[bigdl-cpu] -f https://developer.intel.com/ipex-whl-stable-cpu \ -f https://download.pytorch.org/whl/torch_stable.html # Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s) diff --git a/dev/docker/Dockerfile.cpu_and_deepspeed b/dev/docker/Dockerfile.cpu_and_deepspeed index 9fb0a581a..c907d775f 100644 --- a/dev/docker/Dockerfile.cpu_and_deepspeed +++ b/dev/docker/Dockerfile.cpu_and_deepspeed @@ -1,10 +1,11 @@ +# syntax=docker/dockerfile:1 FROM ubuntu:22.04 ENV LANG C.UTF-8 WORKDIR /root/llm-on-ray -RUN apt-get update -y \ +RUN --mount=type=cache,target=/var/cache/apt apt-get update -y \ && apt-get install -y build-essential cmake wget curl git vim htop ssh net-tools \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -17,7 +18,7 @@ ENV PATH $CONDA_DIR/bin:$PATH # setup env SHELL ["/bin/bash", "--login", "-c"] -RUN conda init bash && \ +RUN --mount=type=cache,target=/opt/conda/pkgs conda init bash && \ unset -f conda && \ export PATH=$CONDA_DIR/bin/:${PATH} && \ conda config --add channels intel && \ @@ -27,7 +28,7 @@ COPY ./pyproject.toml . RUN mkdir ./finetune && mkdir ./inference -RUN pip install -e .[cpu,deepspeed] -f https://developer.intel.com/ipex-whl-stable-cpu \ +RUN --mount=type=cache,target=/root/.cache/pip pip install -e .[cpu,deepspeed] -f https://developer.intel.com/ipex-whl-stable-cpu \ -f https://download.pytorch.org/whl/torch_stable.html RUN ds_report diff --git a/docs/web_ui.md b/docs/web_ui.md index 3b0578643..da92b7f66 100644 --- a/docs/web_ui.md +++ b/docs/web_ui.md @@ -3,7 +3,7 @@ LLM-on-Ray introduces a Web UI, allowing users to easily finetune and deploy LLMs through a user-friendly interface. Additionally, the UI includes a chatbot application, enabling users to immediately test and refine the models. ## Setup -Please follow [docs/setup.md](docs/setup.md) to setup the environment first. +Please follow [setup.md](setup.md) to setup the environment first. ## Start Web UI @@ -17,17 +17,19 @@ python -u inference/start_ui.py --node_user_name $user --conda_env_name $conda_e ## Finetune LLMs On the `Finetune` tab, you can configure the base model, finetuning parameters, the dataset path and the new model name. Click `Start To Finetune` to start finetuning. -![image](https://github.com/carsonwang/llm-ray/assets/9278199/38cb6f1f-b5de-495e-a4db-741eb1e15980) +![webui1](https://github.com/intel/llm-on-ray/assets/9278199/895be765-13d3-455e-a00d-c9ba67ac6781) + ## Deploy and Serve LLM On the `Deployment` tab, you can choose a model to deploy, configure parameter `Model Replica Number`, `Cpus per Worker` and `Gpus per Worker`. Click `Deploy` and you will get a model endpoint. -![image](https://github.com/carsonwang/llm-ray/assets/9278199/937613ad-951c-4543-9e2d-e5b8e7f38d1b) +![webui2](https://github.com/intel/llm-on-ray/assets/9278199/2a1fb8f2-a2a8-4868-9d1c-418c5c2a6180) + ## Chatbot On the `Inferenc` tab, you can now test the model by asking questions. -![image](https://github.com/carsonwang/llm-ray/assets/9278199/5aa3dace-238a-4b34-9ce2-b3abbd6de2ba) +![webui3](https://github.com/intel/llm-on-ray/assets/9278199/f7b9dc79-92fe-4e75-85fa-2cf7f36bb58d) diff --git a/finetune/finetune.py b/finetune/finetune.py index 088ef89f1..430c452de 100644 --- a/finetune/finetune.py +++ b/finetune/finetune.py @@ -108,6 +108,7 @@ def train_func(config: Dict[str, Any]): trainer = common.trainer.Trainer.registory.get("DefaultTrainer")(config = { "num_train_epochs": config["Training"]["epochs"], "max_train_step": config["Training"].get("max_train_steps", None), + "log_step": 1, "output": config["General"]["output_dir"], "dataprocesser": { "type": "GeneralProcesser", @@ -200,6 +201,8 @@ def main(external_config = None): ray.init(runtime_env = runtime_env) + common.logger.info(f"ray available resources = {ray.available_resources()}") + scaling_config = ScalingConfig( num_workers = num_training_workers, use_gpu = use_gpu, diff --git a/inference/models/llama-2-7b-chat-hf.yaml b/inference/models/llama-2-7b-chat-hf.yaml index 168981aaa..b0dc029da 100644 --- a/inference/models/llama-2-7b-chat-hf.yaml +++ b/inference/models/llama-2-7b-chat-hf.yaml @@ -7,7 +7,7 @@ deepspeed: false workers_per_group: 2 device: "cpu" ipex: - enabled: false + enabled: true precision: bf16 model_description: model_id_or_path: meta-llama/Llama-2-7b-chat-hf diff --git a/pretrain/requirements.txt b/pretrain/requirements.txt index fa0d041cd..e25a3e55b 100644 --- a/pretrain/requirements.txt +++ b/pretrain/requirements.txt @@ -4,7 +4,7 @@ numpy==1.24.4 https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl torchvision==0.14.1 torch==1.13.1 -transformers==4.31.0 +transformers==4.36.0 typing==3.7.4.3 tabulate ray[tune] diff --git a/pyproject.toml b/pyproject.toml index 01d5160cc..d5c4396c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "accelerate", "datasets>=2.14.6", "numpy", - "ray @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl", + "ray>=2.9", "typing>=3.7.4.3", "tabulate", "ray[tune]", @@ -36,7 +36,7 @@ dependencies = [ "deltatuner==1.1.9", "py-cpuinfo", "pydantic-yaml", - "paramiko==3.2.0", + "paramiko==3.4.0", ] [project.optional-dependencies] @@ -52,8 +52,8 @@ gpu = [ "torch==2.0.1a0", "torchvision==0.15.2a0", "intel-extension-for-pytorch==2.0.110+xpu", - "oneccl_bind_pt", - "dpctl" + "oneccl_bind_pt==2.0.100+gpu", + "dpctl==0.14.5" ] deepspeed = [ diff --git a/Security.md b/security.md similarity index 68% rename from Security.md rename to security.md index d85d4358b..cb59eb893 100644 --- a/Security.md +++ b/security.md @@ -2,4 +2,4 @@ Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. ## Reporting a Vulnerability -Please report any security vulnerabilities in this project [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). +Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).