[demos] Update llm demo with latest ipex-llm
This commit is contained in:
		
							parent
							
								
									36c009dc97
								
							
						
					
					
						commit
						26db014708
					
				| @ -114,7 +114,7 @@ This controller manages the distributed workers. | ||||
| ```bash | ||||
| cd occlum_instance | ||||
| occlum start | ||||
| HF_DATASETS_CACHE=/root/cache  occlum exec /bin/python3 -m bigdl.llm.serving.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0 | ||||
| HF_DATASETS_CACHE=/root/cache  occlum exec /bin/python3 -m ipex_llm.serving.fastchat.model_worker --model-path /models/chatglm2-6b --device cpu --host 0.0.0.0 | ||||
| ``` | ||||
| Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller. | ||||
| 
 | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| import argparse | ||||
| import torch | ||||
| from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM | ||||
| from ipex_llm.transformers import AutoModel, AutoModelForCausalLM | ||||
| from transformers import AutoTokenizer | ||||
| from benchmark_util import BenchmarkWrapper | ||||
| 
 | ||||
|  | ||||
| @ -19,7 +19,7 @@ import time | ||||
| import argparse | ||||
| import numpy as np | ||||
| 
 | ||||
| from bigdl.llm.transformers import AutoModel, AutoModelForCausalLM | ||||
| from ipex_llm.transformers import AutoModel, AutoModelForCausalLM | ||||
| from transformers import AutoTokenizer | ||||
| 
 | ||||
| # you could tune the prompt based on your own model, | ||||
|  | ||||
| @ -19,7 +19,7 @@ import time | ||||
| import argparse | ||||
| import numpy as np | ||||
| 
 | ||||
| from bigdl.llm.transformers import AutoModel | ||||
| from ipex_llm.transformers import AutoModel | ||||
| from transformers import AutoTokenizer | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -7,9 +7,10 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}"  )" >/dev/null 2>&1 && pwd )" | ||||
| [ -d miniconda ] || bash ./Miniconda3-latest-Linux-x86_64.sh -b -p $script_dir/miniconda | ||||
| $script_dir/miniconda/bin/conda create \ | ||||
|     --prefix $script_dir/python-occlum -y \ | ||||
|     python=3.9.11 | ||||
|     python=3.10.0 | ||||
| 
 | ||||
| # Install BigDL LLM | ||||
| $script_dir/python-occlum/bin/pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu | ||||
| $script_dir/python-occlum/bin/pip install --pre --upgrade bigdl-llm[all] bigdl-llm[serving] | ||||
| $script_dir/python-occlum/bin/pip install transformers_stream_generator einops | ||||
| $script_dir/python-occlum/bin/pip install torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu | ||||
| $script_dir/python-occlum/bin/pip install --pre --upgrade ipex-llm[all] ipex-llm[serving] | ||||
| # $script_dir/python-occlum/bin/pip install intel-extension-for-pytorch | ||||
| $script_dir/python-occlum/bin/pip install transformers_stream_generator einops tiktoken | ||||
|  | ||||
| @ -20,7 +20,7 @@ import argparse | ||||
| 
 | ||||
| 
 | ||||
| # load Hugging Face Transformers model with INT4 optimizations | ||||
| from bigdl.llm.transformers import AutoModelForCausalLM | ||||
| from ipex_llm.transformers import AutoModelForCausalLM | ||||
| from transformers import AutoTokenizer | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user