Fix llama.cpp truncation (#3400)
--------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
parent
4e6dc6d99d
commit
f4005164f4
2 changed files with 7 additions and 1 deletions
|
@ -6,6 +6,7 @@ import torch
|
|||
from modules import shared
|
||||
from modules.callbacks import Iteratorize
|
||||
from modules.logging_colors import logger
|
||||
from modules.text_generation import get_max_prompt_length
|
||||
|
||||
import llama_cpp
|
||||
|
||||
|
@ -91,6 +92,12 @@ class LlamaCppModel:
|
|||
LogitsProcessorList = llama_cpp_lib().LogitsProcessorList
|
||||
|
||||
prompt = prompt if type(prompt) is str else prompt.decode()
|
||||
|
||||
# Handle truncation
|
||||
prompt = self.encode(prompt)
|
||||
prompt = prompt[-get_max_prompt_length(state):]
|
||||
prompt = self.decode(prompt).decode('utf-8')
|
||||
|
||||
completion_chunks = self.model.create_completion(
|
||||
prompt=prompt,
|
||||
max_tokens=state['max_new_tokens'],
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue