AutoGPTQ: Add --disable_exllamav2 flag (Mixtral CPU offloading needs this)

This commit is contained in:
oobabooga 2023-12-15 06:46:13 -08:00
parent 7de10f4c8e
commit 3bbf6c601d
7 changed files with 16 additions and 4 deletions

View file

@ -25,6 +25,7 @@ loaders_and_params = OrderedDict({
'rope_freq_base',
'compress_pos_emb',
'disable_exllama',
'disable_exllamav2',
'transformers_info'
],
'llama.cpp': [
@ -94,6 +95,7 @@ loaders_and_params = OrderedDict({
'groupsize',
'desc_act',
'disable_exllama',
'disable_exllamav2',
'gpu_memory',
'cpu_memory',
'cpu',