From 678fd73aefd28162d1a6276242df77e65757569f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 8 Nov 2023 17:41:12 -0800 Subject: [PATCH] Document /v1/internal/model/load and fix a bug --- extensions/openai/models.py | 2 +- extensions/openai/script.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/extensions/openai/models.py b/extensions/openai/models.py index da900fb..a737f0c 100644 --- a/extensions/openai/models.py +++ b/extensions/openai/models.py @@ -46,7 +46,7 @@ def _load_model(data): unload_model() model_settings = get_model_metadata(model_name) - update_model_parameters(model_settings, initial=True) + update_model_parameters(model_settings) # Update shared.args with custom model loading settings if args: diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 4f8bb0d..57a7bdb 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -241,6 +241,29 @@ async def handle_model_info(): @app.post("/v1/internal/model/load") async def handle_load_model(request_data: LoadModelRequest): + ''' + This endpoint is experimental and may change in the future. + + The "args" parameter can be used to modify flags like "--load-in-4bit" + or "--n-gpu-layers" before loading a model. Example: + + "args": { + "load_in_4bit": true, + "n_gpu_layers": 12 + } + + Note that those settings will remain after loading the model. So you + may need to change them back to load a second model. + + The "settings" parameter is also a dict but with keys for the + shared.settings object. It can be used to modify the default instruction + template like this: + + "settings": { + "instruction_template": "Alpaca" + } + ''' + try: OAImodels._load_model(to_dict(request_data)) return JSONResponse(content="OK")