From 759da435e3ad27c07642e8922ff281cfd6689203 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 19 Jan 2023 12:01:58 -0300 Subject: [PATCH] Release 8-bit models memory --- server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server.py b/server.py index 21009b5..163e1e0 100644 --- a/server.py +++ b/server.py @@ -11,6 +11,7 @@ import transformers from html_generator import * from transformers import AutoTokenizer, AutoModelForCausalLM import warnings +import gc transformers.logging.set_verbosity_error() @@ -151,6 +152,7 @@ def generate_reply(question, tokens, inference_settings, selected_model, eos_tok model = None tokenizer = None if not args.cpu: + gc.collect() torch.cuda.empty_cache() model, tokenizer = load_model(model_name) if inference_settings != loaded_preset: