[demos] Update llm demo with latest ipex-llm

2024-03-26 15:22:59 +08:00 · 2024-03-26 15:22:59 +08:00 · 26db014708
commit 26db014708
parent 36c009dc97
6 changed files with 10 additions and 9 deletions
--- a/demos/bigdl-llm/README.md
+++ b/demos/bigdl-llm/README.md
@ -114,7 +114,7 @@ This controller manages the distributed workers.
 ```bash
 cd occlum_instance
 occlum start
-HF_DATASETS_CACHE=/root/cache  occlum exec /bin/python3 -m bigdl.llm.serving.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0
+HF_DATASETS_CACHE=/root/cache  occlum exec /bin/python3 -m ipex_llm.serving.fastchat.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0
 ```
 Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller.

--- a/demos/bigdl-llm/benchmarks/bench.py
+++ b/demos/bigdl-llm/benchmarks/bench.py
@ -1,6 +1,6 @@
 import argparse
 import torch
-from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM
+from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
 from transformers import AutoTokenizer
 from benchmark_util import BenchmarkWrapper

--- a/demos/bigdl-llm/chatglm2/generate.py
+++ b/demos/bigdl-llm/chatglm2/generate.py
@ -19,7 +19,7 @@ import time
 import argparse
 import numpy as np

-from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM
+from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
 from transformers import AutoTokenizer

 # you could tune the prompt based on your own model,
--- a/demos/bigdl-llm/chatglm2/streamchat.py
+++ b/demos/bigdl-llm/chatglm2/streamchat.py
@ -19,7 +19,7 @@ import time
 import argparse
 import numpy as np

-from bigdl.llm.transformers import AutoModel
+from ipex_llm.transformers import AutoModel
 from transformers import AutoTokenizer


--- a/demos/bigdl-llm/install_python_with_conda.sh
+++ b/demos/bigdl-llm/install_python_with_conda.sh
@ -7,9 +7,10 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}"  )" >/dev/null 2>&1 && pwd )"
 [ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda
 $script_dir/miniconda/bin/conda create \
    --prefix $script_dir/python-occlum -y \
-    python=3.9.11
+    python=3.10.0

 # Install BigDL LLM
-$script_dir/python-occlum/bin/pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
-$script_dir/python-occlum/bin/pip install --pre --upgrade bigdl-llm[all] bigdl-llm[serving]
-$script_dir/python-occlum/bin/pip install transformers_stream_generator einops
+$script_dir/python-occlum/bin/pip install torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu
+$script_dir/python-occlum/bin/pip install --pre --upgrade ipex-llm[all] ipex-llm[serving]
+# $script_dir/python-occlum/bin/pip install intel-extension-for-pytorch
+$script_dir/python-occlum/bin/pip install transformers_stream_generator einops tiktoken
--- a/demos/bigdl-llm/model_convert.py
+++ b/demos/bigdl-llm/model_convert.py
@ -20,7 +20,7 @@ import argparse


 # load Hugging Face Transformers model with INT4 optimizations
-from bigdl.llm.transformers import AutoModelForCausalLM
+from ipex_llm.transformers import AutoModelForCausalLM
 from transformers import AutoTokenizer