From ebf720585b5d13e5c8f7bcea63adfb86a5aee49d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 22 Jan 2023 20:07:19 -0300 Subject: [PATCH] Mention time and it/s in terminal with streaming off --- server.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server.py b/server.py index a5600d5..011219c 100644 --- a/server.py +++ b/server.py @@ -182,8 +182,11 @@ def generate_reply(question, tokens, inference_settings, selected_model, eos_tok # Generate the entire reply at once if args.no_stream: + t0 = time.time() output = eval(f"model.generate(input_ids, eos_token_id={n}, {preset}){cuda}") reply = decode(output[0]) + t1 = time.time() + print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output[0])-len(input_ids[0]))/(t1-t0):.2f} it/s)") yield formatted_outputs(reply, model_name) # Generate the reply 1 token at a time