Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Inference] Fix auth token and add models starcoder and llama2 #39

Merged
merged 33 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
dc4895f
add starcoder and enable llama2
Deegue Jan 8, 2024
cd1a0ef
nit
Deegue Jan 8, 2024
dacbab3
nit
Deegue Jan 8, 2024
35e4288
revert
Deegue Jan 9, 2024
e73cf55
add token
Deegue Jan 9, 2024
f809782
dedup
Deegue Jan 9, 2024
5a55e87
add token to from_pretrained
Deegue Jan 9, 2024
7c2f004
pass auth token to from_pretrained
Deegue Jan 10, 2024
1c48886
nit
Deegue Jan 10, 2024
d2651f8
add auth tokens
Deegue Jan 10, 2024
9ee82ff
Merge branch 'main' into add_starcoder
Deegue Jan 15, 2024
9f552ba
lint
Deegue Jan 15, 2024
462164e
Merge branch 'add_starcoder' of https://github.com/Deegue/llm-on-ray …
Deegue Jan 15, 2024
562913e
fix lint
Deegue Jan 15, 2024
2d3f7c6
Merge branch 'main' into add_starcoder
Deegue Jan 15, 2024
836b7f4
nit
Deegue Jan 16, 2024
0cb47c7
deepspeed not support starcoder
Deegue Jan 16, 2024
23e8b63
nit
Deegue Jan 16, 2024
77cedb1
remove from ci
Deegue Jan 17, 2024
85cb34d
remove direct auth token
Deegue Jan 19, 2024
8ebbcad
add back ci workflow temporarily
Deegue Jan 19, 2024
fee7f30
Merge branch 'main' into add_starcoder
Deegue Jan 19, 2024
ea9e0cc
Merge branch 'add_starcoder' of https://github.com/Deegue/llm-on-ray …
Deegue Jan 19, 2024
a3be1cd
remove from ci
Deegue Jan 19, 2024
9b7bff6
add load environment and enable 2 models again
Deegue Jan 23, 2024
1ede3bb
add dir
Deegue Jan 24, 2024
469acb5
add load environment and enable 2 models again
Deegue Jan 23, 2024
2e3b4e2
Merge branch 'add_starcoder' of https://github.com/Deegue/llm-on-ray …
Deegue Jan 24, 2024
7356b39
change proxy
Deegue Jan 24, 2024
f171099
revert proxy
Deegue Jan 24, 2024
5ff00ee
change proxy
Deegue Jan 29, 2024
c8a53e2
revert proxy
Deegue Feb 5, 2024
b1e78ad
remove 2 models from ci
Deegue Feb 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions .github/workflows/workflow_inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
name: inference test
strategy:
matrix:
model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, llama-2-7b-chat-hf-vllm ]
model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b, mistral-7b-v0.1, mpt-7b-bigdl, neural-chat-7b-v3-1, CodeLlama-7b-hf, falcon-7b, starcoder, llama-2-7b-chat-hf, llama-2-7b-chat-hf-vllm ]
isPR:
- ${{inputs.ci_type == 'pr'}}

Expand All @@ -61,11 +61,15 @@ jobs:
https_proxy: ${{ inputs.https_proxy }}
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- ${{ inputs.runner_config_path }}:/root/actions-runner-config

steps:
- name: Checkout
uses: actions/checkout@v2

- name: Load environment variables
run: cat /root/actions-runner-config/.env >> $GITHUB_ENV

- name: Determine Target
id: "target"
run: |
Expand Down Expand Up @@ -109,6 +113,25 @@ jobs:
- name: Run Inference Test
run: |
TARGET=${{steps.target.outputs.target}}
CMD=$(cat << EOF
import yaml
if ("${{ matrix.model }}" == "starcoder"):
conf_path = "inference/models/starcoder.yaml"
with open(conf_path, encoding="utf-8") as reader:
result = yaml.load(reader, Loader=yaml.FullLoader)
result['model_description']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
with open(conf_path, 'w') as output:
yaml.dump(result, output, sort_keys=False)
if ("${{ matrix.model }}" == "llama-2-7b-chat-hf"):
conf_path = "inference/models/llama-2-7b-chat-hf.yaml"
with open(conf_path, encoding="utf-8") as reader:
result = yaml.load(reader, Loader=yaml.FullLoader)
result['model_description']["config"]["use_auth_token"] = "${{ env.HF_ACCESS_TOKEN }}"
with open(conf_path, 'w') as output:
yaml.dump(result, output, sort_keys=False)
EOF
)
docker exec "${TARGET}" python -c "$CMD"
if [[ ${{ matrix.model }} == "mpt-7b-bigdl" ]]; then
docker exec "${TARGET}" bash -c "python inference/serve.py --config_file inference/models/bigdl/mpt-7b-bigdl.yaml --simple"
elif [[ ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
Expand All @@ -132,7 +155,7 @@ jobs:
- name: Run Inference Test with DeepSpeed
run: |
TARGET=${{steps.target.outputs.target}}
if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|mpt-7b.*)$ ]]; then
if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
echo ${{ matrix.model }} is not supported!
elif [[ ! ${{ matrix.model }} == "llama-2-7b-chat-hf-vllm" ]]; then
docker exec "${TARGET}" bash -c "python .github/workflows/config/update_inference_config.py --config_file inference/models/\"${{ matrix.model }}\".yaml --output_file \"${{ matrix.model }}\".yaml.deepspeed --deepspeed"
Expand All @@ -145,7 +168,7 @@ jobs:
if: ${{ matrix.dtuner_model }}
run: |
TARGET=${{steps.target.outputs.target}}
if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b.*)$ ]]; then
if [[ ${{ matrix.model }} =~ ^(gpt2|falcon-7b|starcoder|mpt-7b.*)$ ]]; then
echo ${{ matrix.model }} is not supported!
else
docker exec "${TARGET}" bash -c "python inference/serve.py --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml --simple"
Expand Down
7 changes: 6 additions & 1 deletion inference/deepspeed_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(self, infer_conf: InferenceConfig, pad_token_id, stopping_criteria)
model_desc.model_id_or_path,
torchscript=True,
trust_remote_code=model_config.trust_remote_code,
use_auth_token=infer_conf.model_description.config.use_auth_token,
)

# get correct torch type for loading HF model
Expand All @@ -49,7 +50,11 @@ def __init__(self, infer_conf: InferenceConfig, pad_token_id, stopping_criteria)
if model_desc.peft_model_id_or_path:
from peft import PeftModel

self.model = PeftModel.from_pretrained(self.model, model_desc.peft_model_id_or_path)
self.model = PeftModel.from_pretrained(
self.model,
model_desc.peft_model_id_or_path,
use_auth_token=infer_conf.model_description.config.use_auth_token,
)
if model_desc.peft_type == "deltatuner":
from deltatuner import DeltaTunerModel

Expand Down
2 changes: 1 addition & 1 deletion inference/models/llama-2-7b-chat-hf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ deepspeed: false
workers_per_group: 2
device: "cpu"
ipex:
enabled: true
enabled: false
precision: bf16
model_description:
model_id_or_path: meta-llama/Llama-2-7b-chat-hf
Expand Down
22 changes: 22 additions & 0 deletions inference/models/starcoder.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
port: 8000
name: starcoder
route_prefix: /starcoder
cpus_per_worker: 24
gpus_per_worker: 0
deepspeed: false
workers_per_group: 2
ipex:
enabled: false
precision: bf16
device: "cpu"
model_description:
model_id_or_path: bigcode/starcoder
tokenizer_name_or_path: bigcode/starcoder
chat_processor: ChatModelGptJ
prompt:
intro: ''
human_id: ''
bot_id: ''
stop_words: []
config:
use_auth_token: ''
3 changes: 2 additions & 1 deletion inference/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ class Predictor:
def __init__(self, infer_conf: InferenceConfig) -> None:
self.infer_conf = infer_conf
self.tokenizer = AutoTokenizer.from_pretrained(
infer_conf.model_description.tokenizer_name_or_path
infer_conf.model_description.tokenizer_name_or_path,
**infer_conf.model_description.config.dict(),
)
self.device = torch.device(infer_conf.device)
# now deepspeed predictor don't have the model
Expand Down
7 changes: 6 additions & 1 deletion inference/transformer_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def __init__(self, infer_conf: InferenceConfig):
model_desc.model_id_or_path,
torchscript=True,
trust_remote_code=model_config.trust_remote_code,
use_auth_token=infer_conf.model_description.config.use_auth_token,
)

if self.device.type == "hpu":
Expand Down Expand Up @@ -52,7 +53,11 @@ def __init__(self, infer_conf: InferenceConfig):
if model_desc.peft_model_id_or_path:
from peft import PeftModel

model = PeftModel.from_pretrained(model, model_desc.peft_model_id_or_path)
model = PeftModel.from_pretrained(
model,
model_desc.peft_model_id_or_path,
use_auth_token=infer_conf.model_description.config.use_auth_token,
)
if model_desc.peft_type == "deltatuner":
from deltatuner import DeltaTunerModel

Expand Down
Loading