Stop generation immediately when using "Maximum tokens/second" (#3952)
--------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
parent
b7c55665c1
commit
893a72a1c5
1 changed files with 1 additions and 1 deletions
|
@ -96,7 +96,7 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
|
|||
last_update = cur_time
|
||||
yield reply
|
||||
|
||||
if stop_found:
|
||||
if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
|
||||
break
|
||||
|
||||
if not is_chat:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue