Always return only the new tokens in generation functions

This commit is contained in:
oobabooga 2023-05-11 17:07:20 -03:00
parent c4f0e6d740
commit 0d36c18f5d
4 changed files with 16 additions and 25 deletions

View file

@ -43,7 +43,7 @@ class Handler(BaseHTTPRequestHandler):
response = json.dumps({
'results': [{
'text': answer[len(prompt):]
'text': answer
}]
})
self.wfile.write(response.encode('utf-8'))

View file

@ -29,7 +29,7 @@ async def _handle_connection(websocket, path):
prompt, generate_params, stopping_strings=stopping_strings, is_chat=False)
# As we stream, only send the new bytes.
skip_index = len(prompt)
skip_index = 0
message_num = 0
for a in generator: