AutoAWQ: initial support (#3999)

This commit is contained in:
cal066 2023-10-05 16:19:18 +00:00 committed by GitHub
parent 3f56151f03
commit cc632c3f33
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 75 additions and 3 deletions

View file

@ -63,6 +63,7 @@ def load_model(model_name, loader=None):
'ExLlamav2': ExLlamav2_loader,
'ExLlamav2_HF': ExLlamav2_HF_loader,
'ctransformers': ctransformers_loader,
'AutoAWQ': AutoAWQ_loader,
}
if loader is None:
@ -276,6 +277,24 @@ def ctransformers_loader(model_name):
model, tokenizer = ctrans.from_pretrained(model_file)
return model, tokenizer
def AutoAWQ_loader(model_name):
from awq import AutoAWQForCausalLM
model_dir = Path(f'{shared.args.model_dir}/{model_name}')
if shared.args.deepspeed:
logger.warn("AutoAWQ is incompatible with deepspeed")
model = AutoAWQForCausalLM.from_quantized(
quant_path=model_dir,
max_new_tokens=shared.args.max_seq_len,
trust_remote_code=shared.args.trust_remote_code,
fuse_layers=not shared.args.no_inject_fused_attention,
max_memory=get_max_memory_dict(),
batch_size=shared.args.n_batch,
safetensors=not shared.args.trust_remote_code)
return model
def GPTQ_loader(model_name):