Fix partial unicode characters issue (#4837)

This commit is contained in:
Yiximail 2023-12-08 20:50:53 +08:00 committed by GitHub
parent 2c5a1e67f9
commit 1c74b3ab45
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 3 deletions

View file

@ -362,7 +362,12 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
if output[-1] in eos_token_ids:
break
cumulative_reply += get_reply_from_output_ids(output, state, starting_from=starting_from)
new_content = get_reply_from_output_ids(output, state, starting_from=starting_from)
# check the partial unicode character
if chr(0xfffd) in new_content:
continue
cumulative_reply += new_content
starting_from = len(output)
yield cumulative_reply