From d2ea925fa5a0b83e607e67681f944d461a23ad24 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 16 Apr 2023 00:53:40 -0300 Subject: [PATCH] Bump llama-cpp-python to use LlamaCache --- modules/llamacpp_model_alternative.py | 3 ++- requirements.txt | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/llamacpp_model_alternative.py b/modules/llamacpp_model_alternative.py index 8fea2ab..6bdf9bc 100644 --- a/modules/llamacpp_model_alternative.py +++ b/modules/llamacpp_model_alternative.py @@ -6,7 +6,7 @@ Documentation: https://abetlen.github.io/llama-cpp-python/ ''' -from llama_cpp import Llama +from llama_cpp import Llama, LlamaCache from modules import shared from modules.callbacks import Iteratorize @@ -27,6 +27,7 @@ class LlamaCppModel: 'n_threads': shared.args.threads or None } self.model = Llama(**params) + self.model.set_cache(LlamaCache) # This is ugly, but the model and the tokenizer are the same object in this library. return result, result diff --git a/requirements.txt b/requirements.txt index 996afe2..34eee23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,5 +14,5 @@ tqdm git+https://github.com/huggingface/peft transformers==4.28.0 bitsandbytes==0.38.1; platform_system != "Windows" -llama-cpp-python==0.1.33; platform_system != "Windows" -https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.33/llama_cpp_python-0.1.33-cp310-cp310-win_amd64.whl; platform_system == "Windows" +llama-cpp-python==0.1.34; platform_system != "Windows" +https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.34/llama_cpp_python-0.1.34-cp310-cp310-win_amd64.whl; platform_system == "Windows"