Always return only the new tokens in generation functions

2023-05-11 17:07:20 -03:00 · 2023-05-11 17:07:20 -03:00 · 0d36c18f5d
commit 0d36c18f5d
parent c4f0e6d740
4 changed files with 16 additions and 25 deletions
--- a/extensions/api/blocking_api.py
+++ b/extensions/api/blocking_api.py
@ -43,7 +43,7 @@ class Handler(BaseHTTPRequestHandler):

            response = json.dumps({
                'results': [{
-                    'text': answer[len(prompt):]
+                    'text': answer
                }]
            })
            self.wfile.write(response.encode('utf-8'))
--- a/extensions/api/streaming_api.py
+++ b/extensions/api/streaming_api.py
@ -29,7 +29,7 @@ async def _handle_connection(websocket, path):
            prompt, generate_params, stopping_strings=stopping_strings, is_chat=False)

        # As we stream, only send the new bytes.
-        skip_index = len(prompt)
+        skip_index = 0
        message_num = 0

        for a in generator: