Fix partial unicode characters issue (#4837)
This commit is contained in:
parent
2c5a1e67f9
commit
1c74b3ab45
3 changed files with 33 additions and 3 deletions
|
@ -138,11 +138,19 @@ class Exllamav2Model:
|
|||
if has_leading_space:
|
||||
decoded_text = ' ' + decoded_text
|
||||
|
||||
yield decoded_text
|
||||
# Check the partial unicode character
|
||||
if chr(0xfffd) in decoded_text:
|
||||
is_last = i == max_new_tokens - 1
|
||||
is_stopping = token.item() == self.tokenizer.eos_token_id or shared.stop_everything
|
||||
# If we are not at the end of the generation, we skip this token
|
||||
if not (is_last or is_stopping):
|
||||
continue
|
||||
|
||||
if token.item() == self.tokenizer.eos_token_id or shared.stop_everything:
|
||||
break
|
||||
|
||||
yield decoded_text
|
||||
|
||||
def generate(self, prompt, state):
|
||||
output = ''
|
||||
for output in self.generate_with_streaming(prompt, state):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue