Add ExLlama support (#2444)
This commit is contained in:
parent
dea43685b0
commit
9f40032d32
12 changed files with 156 additions and 47 deletions
|
|
@ -77,7 +77,10 @@ def load_model_wrapper(selected_model, loader, autoload=False):
|
|||
else:
|
||||
yield f"Failed to load {selected_model}."
|
||||
except:
|
||||
yield traceback.format_exc()
|
||||
exc = traceback.format_exc()
|
||||
logger.error('Failed to load the model.')
|
||||
print(exc)
|
||||
yield exc
|
||||
|
||||
|
||||
def load_lora_wrapper(selected_loras):
|
||||
|
|
@ -193,7 +196,7 @@ def create_model_menus():
|
|||
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=["Transformers", "AutoGPTQ", "GPTQ-for-LLaMa", "llama.cpp"], value=None)
|
||||
shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=["Transformers", "AutoGPTQ", "GPTQ-for-LLaMa", "ExLlama", "llama.cpp"], value=None)
|
||||
with gr.Box():
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
|
|
@ -213,6 +216,7 @@ def create_model_menus():
|
|||
shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None", "llama", "opt", "gptj"], value=shared.args.model_type or "None")
|
||||
shared.gradio['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer[0] if shared.args.pre_layer is not None else 0)
|
||||
shared.gradio['autogptq_info'] = gr.Markdown('On some systems, AutoGPTQ can be 2x slower than GPTQ-for-LLaMa. You can manually select the GPTQ-for-LLaMa loader above.')
|
||||
shared.gradio['exllama_info'] = gr.Markdown('ExLlama has to be installed manually. See the instructions here: [instructions](https://github.com/oobabooga/text-generation-webui/blob/main/docs/ExLlama')
|
||||
|
||||
with gr.Column():
|
||||
shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue