diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index 9f6122d..10a852d 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -52,9 +52,9 @@ class LlamaCppModel: 'n_gpu_layers': shared.args.n_gpu_layers } - self.model = Llama(**params) + result.model = Llama(**params) if cache_capacity > 0: - self.model.set_cache(LlamaCache(capacity_bytes=cache_capacity)) + result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity)) # This is ugly, but the model and the tokenizer are the same object in this library. return result, result