Add HQQ quant loader (#4888)

---------

Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
Water 2023-12-18 19:23:16 -05:00 committed by GitHub
parent 64a57d9dc2
commit 674be9a09a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 79 additions and 0 deletions

View file

@ -144,6 +144,9 @@ parser.add_argument('--pre_layer', type=int, nargs='+', help='The number of laye
parser.add_argument('--checkpoint', type=str, help='The path to the quantized checkpoint file. If not specified, it will be automatically detected.')
parser.add_argument('--monkey-patch', action='store_true', help='Apply the monkey patch for using LoRAs with quantized models.')
# HQQ
parser.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
# DeepSpeed
parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
@ -246,6 +249,8 @@ def fix_loader_name(name):
return 'AutoAWQ'
elif name in ['quip#', 'quip-sharp', 'quipsharp', 'quip_sharp']:
return 'QuIP#'
elif name in ['hqq']:
return 'HQQ'
def add_extension(name, last=False):