[demos] Update llm demo with latest ipex-llm

This commit is contained in:
Qi Zheng 2024-03-26 15:22:59 +08:00 committed by volcano
parent 36c009dc97
commit 26db014708
6 changed files with 10 additions and 9 deletions

@ -114,7 +114,7 @@ This controller manages the distributed workers.
```bash
cd occlum_instance
occlum start
HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m bigdl.llm.serving.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0
HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m ipex_llm.serving.fastchat.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0
```
Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller.

@ -1,6 +1,6 @@
import argparse
import torch
from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM
from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
from transformers import AutoTokenizer
from benchmark_util import BenchmarkWrapper

@ -19,7 +19,7 @@ import time
import argparse
import numpy as np
from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM
from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
from transformers import AutoTokenizer
# you could tune the prompt based on your own model,

@ -19,7 +19,7 @@ import time
import argparse
import numpy as np
from bigdl.llm.transformers import AutoModel
from ipex_llm.transformers import AutoModel
from transformers import AutoTokenizer

@ -7,9 +7,10 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
[ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda
$script_dir/miniconda/bin/conda create \
--prefix $script_dir/python-occlum -y \
python=3.9.11
python=3.10.0
# Install BigDL LLM
$script_dir/python-occlum/bin/pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
$script_dir/python-occlum/bin/pip install --pre --upgrade bigdl-llm[all] bigdl-llm[serving]
$script_dir/python-occlum/bin/pip install transformers_stream_generator einops
$script_dir/python-occlum/bin/pip install torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu
$script_dir/python-occlum/bin/pip install --pre --upgrade ipex-llm[all] ipex-llm[serving]
# $script_dir/python-occlum/bin/pip install intel-extension-for-pytorch
$script_dir/python-occlum/bin/pip install transformers_stream_generator einops tiktoken

@ -20,7 +20,7 @@ import argparse
# load Hugging Face Transformers model with INT4 optimizations
from bigdl.llm.transformers import AutoModelForCausalLM
from ipex_llm.transformers import AutoModelForCausalLM
from transformers import AutoTokenizer