diff --git a/demos/bigdl-llm/README.md b/demos/bigdl-llm/README.md index b176f6d5..a190be9c 100644 --- a/demos/bigdl-llm/README.md +++ b/demos/bigdl-llm/README.md @@ -114,7 +114,7 @@ This controller manages the distributed workers. ```bash cd occlum_instance occlum start -HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m bigdl.llm.serving.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0 +HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m ipex_llm.serving.fastchat.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0 ``` Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller. diff --git a/demos/bigdl-llm/benchmarks/bench.py b/demos/bigdl-llm/benchmarks/bench.py index 27bcda31..d1b975fb 100644 --- a/demos/bigdl-llm/benchmarks/bench.py +++ b/demos/bigdl-llm/benchmarks/bench.py @@ -1,6 +1,6 @@ import argparse import torch -from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM +from ipex_llm.transformers import AutoModel, AutoModelForCausalLM from transformers import AutoTokenizer from benchmark_util import BenchmarkWrapper diff --git a/demos/bigdl-llm/chatglm2/generate.py b/demos/bigdl-llm/chatglm2/generate.py index 3bae5c1a..9cc56c4b 100644 --- a/demos/bigdl-llm/chatglm2/generate.py +++ b/demos/bigdl-llm/chatglm2/generate.py @@ -19,7 +19,7 @@ import time import argparse import numpy as np -from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM +from ipex_llm.transformers import AutoModel, AutoModelForCausalLM from transformers import AutoTokenizer # you could tune the prompt based on your own model, diff --git a/demos/bigdl-llm/chatglm2/streamchat.py b/demos/bigdl-llm/chatglm2/streamchat.py index 3bbf5333..5cab690d 100644 --- a/demos/bigdl-llm/chatglm2/streamchat.py +++ b/demos/bigdl-llm/chatglm2/streamchat.py @@ -19,7 +19,7 @@ import time import argparse import numpy as np -from bigdl.llm.transformers import AutoModel +from ipex_llm.transformers import AutoModel from transformers import AutoTokenizer diff --git a/demos/bigdl-llm/install_python_with_conda.sh b/demos/bigdl-llm/install_python_with_conda.sh index 201658dc..9a86ab30 100755 --- a/demos/bigdl-llm/install_python_with_conda.sh +++ b/demos/bigdl-llm/install_python_with_conda.sh @@ -7,9 +7,10 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" [ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda $script_dir/miniconda/bin/conda create \ --prefix $script_dir/python-occlum -y \ - python=3.9.11 + python=3.10.0 # Install BigDL LLM -$script_dir/python-occlum/bin/pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu -$script_dir/python-occlum/bin/pip install --pre --upgrade bigdl-llm[all] bigdl-llm[serving] -$script_dir/python-occlum/bin/pip install transformers_stream_generator einops +$script_dir/python-occlum/bin/pip install torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu +$script_dir/python-occlum/bin/pip install --pre --upgrade ipex-llm[all] ipex-llm[serving] +# $script_dir/python-occlum/bin/pip install intel-extension-for-pytorch +$script_dir/python-occlum/bin/pip install transformers_stream_generator einops tiktoken diff --git a/demos/bigdl-llm/model_convert.py b/demos/bigdl-llm/model_convert.py index 1ef2c580..3d21eec9 100644 --- a/demos/bigdl-llm/model_convert.py +++ b/demos/bigdl-llm/model_convert.py @@ -20,7 +20,7 @@ import argparse # load Hugging Face Transformers model with INT4 optimizations -from bigdl.llm.transformers import AutoModelForCausalLM +from ipex_llm.transformers import AutoModelForCausalLM from transformers import AutoTokenizer