Always return only the new tokens in generation functions

This commit is contained in:
oobabooga 2023-05-11 17:07:20 -03:00
parent c4f0e6d740
commit 0d36c18f5d
4 changed files with 16 additions and 25 deletions

View file

@ -29,7 +29,7 @@ async def _handle_connection(websocket, path):
prompt, generate_params, stopping_strings=stopping_strings, is_chat=False)
# As we stream, only send the new bytes.
skip_index = len(prompt)
skip_index = 0
message_num = 0
for a in generator: