AutoGPTQ: Add --disable_exllamav2 flag (Mixtral CPU offloading needs this)
This commit is contained in:
parent
7de10f4c8e
commit
3bbf6c601d
7 changed files with 16 additions and 4 deletions
|
@ -52,6 +52,7 @@ def load_quantized(model_name):
|
|||
'quantize_config': quantize_config,
|
||||
'use_cuda_fp16': not shared.args.no_use_cuda_fp16,
|
||||
'disable_exllama': shared.args.disable_exllama,
|
||||
'disable_exllamav2': shared.args.disable_exllamav2,
|
||||
}
|
||||
|
||||
logger.info(f"The AutoGPTQ params are: {params}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue