Use AutoGPTQ by default for GPTQ models

2023-06-05 15:41:48 -03:00 · 2023-06-05 15:41:48 -03:00 · f276d88546
commit f276d88546
parent 632571a009
5 changed files with 19 additions and 21 deletions
--- a/modules/models.py
+++ b/modules/models.py
@ -81,10 +81,10 @@ def load_model(model_name):
        logger.error('The path to the model does not exist. Exiting.')
        return None, None

-    if shared.args.autogptq:
-        load_func = AutoGPTQ_loader
-    elif shared.args.wbits > 0:
+    if shared.args.gptq_for_llama:
        load_func = GPTQ_loader
+    elif Path(f'{shared.args.model_dir}/{model_name}/quantize_config.json').exists() or shared.args.wbits > 0:
+        load_func = AutoGPTQ_loader
    elif shared.model_type == 'llamacpp':
        load_func = llamacpp_loader
    elif shared.model_type == 'rwkv':
--- a/modules/shared.py
+++ b/modules/shared.py
@ -141,7 +141,8 @@ parser.add_argument('--warmup_autotune', action='store_true', help='(triton) Ena
 parser.add_argument('--fused_mlp', action='store_true', help='(triton) Enable fused mlp.')

 # AutoGPTQ
-parser.add_argument('--autogptq', action='store_true', help='Use AutoGPTQ for loading quantized models instead of the internal GPTQ loader.')
+parser.add_argument('--gptq-for-llama', action='store_true', help='Use GPTQ-for-LLaMa to load the GPTQ model instead of AutoGPTQ.')
+parser.add_argument('--autogptq', action='store_true', help='DEPRECATED')
 parser.add_argument('--triton', action='store_true', help='Use triton.')
 parser.add_argument('--desc_act', action='store_true', help='For models that don\'t have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig.')

@ -181,12 +182,9 @@ parser.add_argument('--multimodal-pipeline', type=str, default=None, help='The m
 args = parser.parse_args()
 args_defaults = parser.parse_args([])

-# Deprecation warnings for parameters that have been renamed
-deprecated_dict = {}
-for k in deprecated_dict:
-    if getattr(args, k) != deprecated_dict[k][1]:
-        logger.warning(f"--{k} is deprecated and will be removed. Use --{deprecated_dict[k][0]} instead.")
-        setattr(args, deprecated_dict[k][0], getattr(args, k))
+# Deprecation warnings
+if args.autogptq:
+    logger.warning('--autogptq has been deprecated and will be removed soon. AutoGPTQ is now used by default for GPTQ models.')

 # Security warnings
 if args.trust_remote_code:
--- a/modules/ui.py
+++ b/modules/ui.py
@ -30,7 +30,7 @@ theme = gr.themes.Default(


 def list_model_elements():
-    elements = ['cpu_memory', 'auto_devices', 'disk', 'cpu', 'bf16', 'load_in_8bit', 'trust_remote_code', 'load_in_4bit', 'compute_dtype', 'quant_type', 'use_double_quant', 'wbits', 'groupsize', 'model_type', 'pre_layer', 'autogptq', 'triton', 'desc_act', 'threads', 'n_batch', 'no_mmap', 'mlock', 'n_gpu_layers', 'n_ctx', 'llama_cpp_seed']
+    elements = ['cpu_memory', 'auto_devices', 'disk', 'cpu', 'bf16', 'load_in_8bit', 'trust_remote_code', 'load_in_4bit', 'compute_dtype', 'quant_type', 'use_double_quant', 'gptq_for_llama', 'wbits', 'groupsize', 'model_type', 'pre_layer', 'triton', 'desc_act', 'threads', 'n_batch', 'no_mmap', 'mlock', 'n_gpu_layers', 'n_ctx', 'llama_cpp_seed']
    for i in range(torch.cuda.device_count()):
        elements.append(f'gpu_memory_{i}')