[demos] Update llm demo with latest ipex-llm

This commit is contained in:
Qi Zheng 2024-03-26 15:22:59 +08:00 committed by volcano
parent 36c009dc97
commit 26db014708
6 changed files with 10 additions and 9 deletions

@ -114,7 +114,7 @@ This controller manages the distributed workers.
```bash ```bash
cd occlum_instance cd occlum_instance
occlum start occlum start
HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m bigdl.llm.serving.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0 HF_DATASETS_CACHE=/root/cache occlum exec /bin/python3 -m ipex_llm.serving.fastchat.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0
``` ```
Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller. Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller.

@ -1,6 +1,6 @@
import argparse import argparse
import torch import torch
from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
from transformers import AutoTokenizer from transformers import AutoTokenizer
from benchmark_util import BenchmarkWrapper from benchmark_util import BenchmarkWrapper

@ -19,7 +19,7 @@ import time
import argparse import argparse
import numpy as np import numpy as np
from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM from ipex_llm.transformers import AutoModel, AutoModelForCausalLM
from transformers import AutoTokenizer from transformers import AutoTokenizer
# you could tune the prompt based on your own model, # you could tune the prompt based on your own model,

@ -19,7 +19,7 @@ import time
import argparse import argparse
import numpy as np import numpy as np
from bigdl.llm.transformers import AutoModel from ipex_llm.transformers import AutoModel
from transformers import AutoTokenizer from transformers import AutoTokenizer

@ -7,9 +7,10 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
[ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda [ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda
$script_dir/miniconda/bin/conda create \ $script_dir/miniconda/bin/conda create \
--prefix $script_dir/python-occlum -y \ --prefix $script_dir/python-occlum -y \
python=3.9.11 python=3.10.0
# Install BigDL LLM # Install BigDL LLM
$script_dir/python-occlum/bin/pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu $script_dir/python-occlum/bin/pip install torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu
$script_dir/python-occlum/bin/pip install --pre --upgrade bigdl-llm[all] bigdl-llm[serving] $script_dir/python-occlum/bin/pip install --pre --upgrade ipex-llm[all] ipex-llm[serving]
$script_dir/python-occlum/bin/pip install transformers_stream_generator einops # $script_dir/python-occlum/bin/pip install intel-extension-for-pytorch
$script_dir/python-occlum/bin/pip install transformers_stream_generator einops tiktoken

@ -20,7 +20,7 @@ import argparse
# load Hugging Face Transformers model with INT4 optimizations # load Hugging Face Transformers model with INT4 optimizations
from bigdl.llm.transformers import AutoModelForCausalLM from ipex_llm.transformers import AutoModelForCausalLM
from transformers import AutoTokenizer from transformers import AutoTokenizer