You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I built the ray image as per https://github.com/intel/llm-on-ray/blob/main/docs/setup.md and could login in to docker image and run ray server with ray start --head --node-ip-address 127.0.0.1 --dashboard-host='0.0.0.0' --dashboard-port=8265
To infer mistral-7b-v0.1 model, I ran the below command
python inference/serve.py --config_file inference/models/mistral-7b-v0.1.yaml --simple, first I got insllation error for intel-extension-for_pytorch after installaing that Iam getting the below error
After installing intel_extension_for_pytorch, getting below error...
(ServeController pid=9051) await self._user_callable_wrapper.initialize_callable()
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 778, in initialize_callable
(ServeController pid=9051) await self._call_func_or_gen(
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 748, in _call_func_or_gen
(ServeController pid=9051) result = callable(*args, **kwargs)
(ServeController pid=9051) File "/root/llm-ray/inference/predictor_deployment.py", line 64, in init
(ServeController pid=9051) self.predictor = TransformerPredictor(infer_conf)
(ServeController pid=9051) File "/root/llm-ray/inference/transformer_predictor.py", line 79, in init
(ServeController pid=9051) import intel_extension_for_pytorch as ipex
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/init.py", line 94, in
(ServeController pid=9051) from . import cpu
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/cpu/init.py", line 1, in
(ServeController pid=9051) from . import runtime
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/cpu/runtime/init.py", line 3, in
(ServeController pid=9051) from .multi_stream import (
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/cpu/runtime/multi_stream.py", line 4, in
(ServeController pid=9051) import intel_extension_for_pytorch._C as core
(ServeController pid=9051) ImportError: /usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/lib/libintel-ext-pt-cpu.so: undefined symbol: _ZNK5torch8autograd4Node4nameEv
I also tried to infer mpt model with python inference/serve.py --config_file inference/models/mpt-7b.yaml --simple
Iam gettign the below error
(ServeController pid=9051) File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result
(ServeController pid=9051) return self.__get_result()
(ServeController pid=9051) File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
(ServeController pid=9051) raise self._exception
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 570, in initialize_and_get_metadata
(ServeController pid=9051) raise RuntimeError(traceback.format_exc()) from None
(ServeController pid=9051) RuntimeError: Traceback (most recent call last):
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 554, in initialize_and_get_metadata
(ServeController pid=9051) await self._user_callable_wrapper.initialize_callable()
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 778, in initialize_callable
(ServeController pid=9051) await self._call_func_or_gen(
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 748, in _call_func_or_gen
(ServeController pid=9051) result = callable(*args, **kwargs)
(ServeController pid=9051) File "/root/llm-ray/inference/predictor_deployment.py", line 64, in init
(ServeController pid=9051) self.predictor = TransformerPredictor(infer_conf)
(ServeController pid=9051) File "/root/llm-ray/inference/transformer_predictor.py", line 22, in init
(ServeController pid=9051) from optimum.habana.transformers.modeling_utils import (
(ServeController pid=9051) File "/root/optimum-habana/optimum/habana/transformers/modeling_utils.py", line 19, in
(ServeController pid=9051) from .models import (
(ServeController pid=9051) File "/root/optimum-habana/optimum/habana/transformers/models/init.py", line 59, in
(ServeController pid=9051) from .mpt import (
(ServeController pid=9051) File "/root/optimum-habana/optimum/habana/transformers/models/mpt/init.py", line 1, in
(ServeController pid=9051) from .modeling_mpt import (
(ServeController pid=9051) File "/root/optimum-habana/optimum/habana/transformers/models/mpt/modeling_mpt.py", line 24, in
(ServeController pid=9051) from transformers.models.mpt.modeling_mpt import MptForCausalLM, MptModel, _expand_mask, _make_causal_mask
(ServeController pid=9051) ImportError: cannot import name '_expand_mask' from 'transformers.models.mpt.modeling_mpt' (/usr/local/lib/python3.10/dist-packages/transformers/models/mpt/modeling_mpt.py)
(ServeController pid=9051) WARNING 2024-01-18 09:37:50,769 controller 9051 application_state.py:726 - The deployments ['PredictorDeployment'] are UNHEALTHY.
Traceback (most recent call last):
File "/root/llm-ray/inference/serve.py", line 170, in
main(sys.argv[1:])
File "/root/llm-ray/inference/serve.py", line 160, in main
openai_serve_run(deployments, host, route_prefix, args.port)
File "/root/llm-ray/inference/api_server_openai.py", line 75, in openai_serve_run
serve.run(
File "/usr/local/lib/python3.10/dist-packages/ray/serve/api.py", line 543, in run
client.deploy_application(
File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/client.py", line 50, in check
return f(self, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/client.py", line 321, in deploy_application
self._wait_for_application_running(name)
Can anny one help me out if Iam missing out anything or any specific version of libarary needed?
The text was updated successfully, but these errors were encountered:
dkiran1
changed the title
Not able to inference server for mistral 7b model, mpt-7b model
Not able to run inference server for mistral 7b model, mpt-7b model
Jan 18, 2024
dkiran1
changed the title
Not able to run inference server for mistral 7b model, mpt-7b model
Not able to run inference server for mistral 7b model, mpt-7b model on Ray
Jan 18, 2024
I built the ray image as per https://github.com/intel/llm-on-ray/blob/main/docs/setup.md and could login in to docker image and run ray server with ray start --head --node-ip-address 127.0.0.1 --dashboard-host='0.0.0.0' --dashboard-port=8265
To infer mistral-7b-v0.1 model, I ran the below command
python inference/serve.py --config_file inference/models/mistral-7b-v0.1.yaml --simple, first I got insllation error for intel-extension-for_pytorch after installaing that Iam getting the below error
After installing intel_extension_for_pytorch, getting below error...
(ServeController pid=9051) await self._user_callable_wrapper.initialize_callable()
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 778, in initialize_callable
(ServeController pid=9051) await self._call_func_or_gen(
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 748, in _call_func_or_gen
(ServeController pid=9051) result = callable(*args, **kwargs)
(ServeController pid=9051) File "/root/llm-ray/inference/predictor_deployment.py", line 64, in init
(ServeController pid=9051) self.predictor = TransformerPredictor(infer_conf)
(ServeController pid=9051) File "/root/llm-ray/inference/transformer_predictor.py", line 79, in init
(ServeController pid=9051) import intel_extension_for_pytorch as ipex
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/init.py", line 94, in
(ServeController pid=9051) from . import cpu
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/cpu/init.py", line 1, in
(ServeController pid=9051) from . import runtime
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/cpu/runtime/init.py", line 3, in
(ServeController pid=9051) from .multi_stream import (
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/cpu/runtime/multi_stream.py", line 4, in
(ServeController pid=9051) import intel_extension_for_pytorch._C as core
(ServeController pid=9051) ImportError: /usr/local/lib/python3.10/dist-packages/intel_extension_for_pytorch/lib/libintel-ext-pt-cpu.so: undefined symbol: _ZNK5torch8autograd4Node4nameEv
I also tried to infer mpt model with python inference/serve.py --config_file inference/models/mpt-7b.yaml --simple
Iam gettign the below error
(ServeController pid=9051) File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result
(ServeController pid=9051) return self.__get_result()
(ServeController pid=9051) File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
(ServeController pid=9051) raise self._exception
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 570, in initialize_and_get_metadata
(ServeController pid=9051) raise RuntimeError(traceback.format_exc()) from None
(ServeController pid=9051) RuntimeError: Traceback (most recent call last):
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 554, in initialize_and_get_metadata
(ServeController pid=9051) await self._user_callable_wrapper.initialize_callable()
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 778, in initialize_callable
(ServeController pid=9051) await self._call_func_or_gen(
(ServeController pid=9051) File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/replica.py", line 748, in _call_func_or_gen
(ServeController pid=9051) result = callable(*args, **kwargs)
(ServeController pid=9051) File "/root/llm-ray/inference/predictor_deployment.py", line 64, in init
(ServeController pid=9051) self.predictor = TransformerPredictor(infer_conf)
(ServeController pid=9051) File "/root/llm-ray/inference/transformer_predictor.py", line 22, in init
(ServeController pid=9051) from optimum.habana.transformers.modeling_utils import (
(ServeController pid=9051) File "/root/optimum-habana/optimum/habana/transformers/modeling_utils.py", line 19, in
(ServeController pid=9051) from .models import (
(ServeController pid=9051) File "/root/optimum-habana/optimum/habana/transformers/models/init.py", line 59, in
(ServeController pid=9051) from .mpt import (
(ServeController pid=9051) File "/root/optimum-habana/optimum/habana/transformers/models/mpt/init.py", line 1, in
(ServeController pid=9051) from .modeling_mpt import (
(ServeController pid=9051) File "/root/optimum-habana/optimum/habana/transformers/models/mpt/modeling_mpt.py", line 24, in
(ServeController pid=9051) from transformers.models.mpt.modeling_mpt import MptForCausalLM, MptModel, _expand_mask, _make_causal_mask
(ServeController pid=9051) ImportError: cannot import name '_expand_mask' from 'transformers.models.mpt.modeling_mpt' (/usr/local/lib/python3.10/dist-packages/transformers/models/mpt/modeling_mpt.py)
(ServeController pid=9051) WARNING 2024-01-18 09:37:50,769 controller 9051 application_state.py:726 - The deployments ['PredictorDeployment'] are UNHEALTHY.
Traceback (most recent call last):
File "/root/llm-ray/inference/serve.py", line 170, in
main(sys.argv[1:])
File "/root/llm-ray/inference/serve.py", line 160, in main
openai_serve_run(deployments, host, route_prefix, args.port)
File "/root/llm-ray/inference/api_server_openai.py", line 75, in openai_serve_run
serve.run(
File "/usr/local/lib/python3.10/dist-packages/ray/serve/api.py", line 543, in run
client.deploy_application(
File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/client.py", line 50, in check
return f(self, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/ray/serve/_private/client.py", line 321, in deploy_application
self._wait_for_application_running(name)
Can anny one help me out if Iam missing out anything or any specific version of libarary needed?
The text was updated successfully, but these errors were encountered: