[demos] Update llm demo with latest ipex-llm
This commit is contained in:
parent
36c009dc97
commit
26db014708
@ -114,7 +114,7 @@ This controller manages the distributed workers.
|
|||||||
```bash
|
```bash
|
||||||
cd occlum_instance
|
cd occlum_instance
|
||||||
occlum start
|
occlum start
|
||||||
HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m bigdl.llm.serving.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0
|
HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m ipex_llm.serving.fastchat.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0
|
||||||
```
|
```
|
||||||
Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller.
|
Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller.
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import torch
|
import torch
|
||||||
from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM
|
from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
from benchmark_util import BenchmarkWrapper
|
from benchmark_util import BenchmarkWrapper
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ import time
|
|||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM
|
from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
# you could tune the prompt based on your own model,
|
# you could tune the prompt based on your own model,
|
||||||
|
@ -19,7 +19,7 @@ import time
|
|||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from bigdl.llm.transformers import AutoModel
|
from ipex_llm.transformers import AutoModel
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,9 +7,10 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
|||||||
[ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda
|
[ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda
|
||||||
$script_dir/miniconda/bin/conda create \
|
$script_dir/miniconda/bin/conda create \
|
||||||
--prefix $script_dir/python-occlum -y \
|
--prefix $script_dir/python-occlum -y \
|
||||||
python=3.9.11
|
python=3.10.0
|
||||||
|
|
||||||
# Install BigDL LLM
|
# Install BigDL LLM
|
||||||
$script_dir/python-occlum/bin/pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
|
$script_dir/python-occlum/bin/pip install torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu
|
||||||
$script_dir/python-occlum/bin/pip install --pre --upgrade bigdl-llm[all] bigdl-llm[serving]
|
$script_dir/python-occlum/bin/pip install --pre --upgrade ipex-llm[all] ipex-llm[serving]
|
||||||
$script_dir/python-occlum/bin/pip install transformers_stream_generator einops
|
# $script_dir/python-occlum/bin/pip install intel-extension-for-pytorch
|
||||||
|
$script_dir/python-occlum/bin/pip install transformers_stream_generator einops tiktoken
|
||||||
|
@ -20,7 +20,7 @@ import argparse
|
|||||||
|
|
||||||
|
|
||||||
# load Hugging Face Transformers model with INT4 optimizations
|
# load Hugging Face Transformers model with INT4 optimizations
|
||||||
from bigdl.llm.transformers import AutoModelForCausalLM
|
from ipex_llm.transformers import AutoModelForCausalLM
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user