AutoAWQ: initial support (#3999)

2023-10-05 16:19:18 +00:00 · 2023-10-05 16:19:18 +00:00 · cc632c3f33
commit cc632c3f33
parent 3f56151f03
8 changed files with 75 additions and 3 deletions
--- a/modules/models.py
+++ b/modules/models.py
@ -63,6 +63,7 @@ def load_model(model_name, loader=None):
        'ExLlamav2': ExLlamav2_loader,
        'ExLlamav2_HF': ExLlamav2_HF_loader,
        'ctransformers': ctransformers_loader,
+        'AutoAWQ': AutoAWQ_loader,
    }

    if loader is None:
@ -276,6 +277,24 @@ def ctransformers_loader(model_name):
    model, tokenizer = ctrans.from_pretrained(model_file)
    return model, tokenizer

+def AutoAWQ_loader(model_name):
+   from awq import AutoAWQForCausalLM
+
+   model_dir = Path(f'{shared.args.model_dir}/{model_name}')
+
+   if shared.args.deepspeed:
+       logger.warn("AutoAWQ is incompatible with deepspeed")
+
+   model = AutoAWQForCausalLM.from_quantized(
+       quant_path=model_dir,
+       max_new_tokens=shared.args.max_seq_len,
+       trust_remote_code=shared.args.trust_remote_code,
+       fuse_layers=not shared.args.no_inject_fused_attention,
+       max_memory=get_max_memory_dict(),
+       batch_size=shared.args.n_batch,
+       safetensors=not shared.args.trust_remote_code)
+
+   return model

 def GPTQ_loader(model_name):