AutoAWQ: initial support (#3999)
This commit is contained in:
parent
3f56151f03
commit
cc632c3f33
8 changed files with 75 additions and 3 deletions
|
@ -63,6 +63,7 @@ def load_model(model_name, loader=None):
|
|||
'ExLlamav2': ExLlamav2_loader,
|
||||
'ExLlamav2_HF': ExLlamav2_HF_loader,
|
||||
'ctransformers': ctransformers_loader,
|
||||
'AutoAWQ': AutoAWQ_loader,
|
||||
}
|
||||
|
||||
if loader is None:
|
||||
|
@ -276,6 +277,24 @@ def ctransformers_loader(model_name):
|
|||
model, tokenizer = ctrans.from_pretrained(model_file)
|
||||
return model, tokenizer
|
||||
|
||||
def AutoAWQ_loader(model_name):
|
||||
from awq import AutoAWQForCausalLM
|
||||
|
||||
model_dir = Path(f'{shared.args.model_dir}/{model_name}')
|
||||
|
||||
if shared.args.deepspeed:
|
||||
logger.warn("AutoAWQ is incompatible with deepspeed")
|
||||
|
||||
model = AutoAWQForCausalLM.from_quantized(
|
||||
quant_path=model_dir,
|
||||
max_new_tokens=shared.args.max_seq_len,
|
||||
trust_remote_code=shared.args.trust_remote_code,
|
||||
fuse_layers=not shared.args.no_inject_fused_attention,
|
||||
max_memory=get_max_memory_dict(),
|
||||
batch_size=shared.args.n_batch,
|
||||
safetensors=not shared.args.trust_remote_code)
|
||||
|
||||
return model
|
||||
|
||||
def GPTQ_loader(model_name):
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue