Add HQQ quant loader (#4888)
--------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
parent
64a57d9dc2
commit
674be9a09a
16 changed files with 79 additions and 0 deletions
|
@ -73,6 +73,7 @@ def load_model(model_name, loader=None):
|
|||
'ctransformers': ctransformers_loader,
|
||||
'AutoAWQ': AutoAWQ_loader,
|
||||
'QuIP#': QuipSharp_loader,
|
||||
'HQQ': HQQ_loader,
|
||||
}
|
||||
|
||||
metadata = get_model_metadata(model_name)
|
||||
|
@ -411,6 +412,18 @@ def ExLlamav2_HF_loader(model_name):
|
|||
return Exllamav2HF.from_pretrained(model_name)
|
||||
|
||||
|
||||
def HQQ_loader(model_name):
|
||||
from hqq.engine.hf import HQQModelForCausalLM
|
||||
from hqq.core.quantize import HQQLinear, HQQBackend
|
||||
|
||||
logger.info(f"Loading HQQ model with backend: {shared.args.hqq_backend}")
|
||||
|
||||
model_dir = Path(f'{shared.args.model_dir}/{model_name}')
|
||||
model = HQQModelForCausalLM.from_quantized(str(model_dir))
|
||||
HQQLinear.set_backend(getattr(HQQBackend, shared.args.hqq_backend))
|
||||
return model
|
||||
|
||||
|
||||
def RWKV_loader(model_name):
|
||||
'''
|
||||
This loader is not currently maintained as RWKV can now be loaded
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue