Fix llama.cpp truncation (#3400)

--------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
2023-08-03 19:01:15 -04:00 · 2023-08-03 19:01:15 -04:00 · f4005164f4
commit f4005164f4
parent 4e6dc6d99d
2 changed files with 7 additions and 1 deletions
--- a/modules/llamacpp_model.py
+++ b/modules/llamacpp_model.py
@ -6,6 +6,7 @@ import torch
 from modules import shared
 from modules.callbacks import Iteratorize
 from modules.logging_colors import logger
+from modules.text_generation import get_max_prompt_length

 import llama_cpp

@ -91,6 +92,12 @@ class LlamaCppModel:
        LogitsProcessorList = llama_cpp_lib().LogitsProcessorList

        prompt = prompt if type(prompt) is str else prompt.decode()
+
+        # Handle truncation
+        prompt = self.encode(prompt)
+        prompt = prompt[-get_max_prompt_length(state):]
+        prompt = self.decode(prompt).decode('utf-8')
+
        completion_chunks = self.model.create_completion(
            prompt=prompt,
            max_tokens=state['max_new_tokens'],