[demos] Update llm demo with latest ipex-llm
This commit is contained in:
parent
36c009dc97
commit
26db014708
@ -114,7 +114,7 @@ This controller manages the distributed workers.
|
||||
```bash
|
||||
cd occlum_instance
|
||||
occlum start
|
||||
HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m bigdl.llm.serving.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0
|
||||
HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m ipex_llm.serving.fastchat.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0
|
||||
```
|
||||
Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller.
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
import argparse
|
||||
import torch
|
||||
from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM
|
||||
from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
|
||||
from transformers import AutoTokenizer
|
||||
from benchmark_util import BenchmarkWrapper
|
||||
|
||||
|
@ -19,7 +19,7 @@ import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM
|
||||
from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
# you could tune the prompt based on your own model,
|
||||
|
@ -19,7 +19,7 @@ import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from bigdl.llm.transformers import AutoModel
|
||||
from ipex_llm.transformers import AutoModel
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
|
||||
|
@ -7,9 +7,10 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
[ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda
|
||||
$script_dir/miniconda/bin/conda create \
|
||||
--prefix $script_dir/python-occlum -y \
|
||||
python=3.9.11
|
||||
python=3.10.0
|
||||
|
||||
# Install BigDL LLM
|
||||
$script_dir/python-occlum/bin/pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
|
||||
$script_dir/python-occlum/bin/pip install --pre --upgrade bigdl-llm[all] bigdl-llm[serving]
|
||||
$script_dir/python-occlum/bin/pip install transformers_stream_generator einops
|
||||
$script_dir/python-occlum/bin/pip install torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu
|
||||
$script_dir/python-occlum/bin/pip install --pre --upgrade ipex-llm[all] ipex-llm[serving]
|
||||
# $script_dir/python-occlum/bin/pip install intel-extension-for-pytorch
|
||||
$script_dir/python-occlum/bin/pip install transformers_stream_generator einops tiktoken
|
||||
|
@ -20,7 +20,7 @@ import argparse
|
||||
|
||||
|
||||
# load Hugging Face Transformers model with INT4 optimizations
|
||||
from bigdl.llm.transformers import AutoModelForCausalLM
|
||||
from ipex_llm.transformers import AutoModelForCausalLM
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user